diff --git a/src/asahi/lib/agx_nir_lower_vbo.c b/src/asahi/lib/agx_nir_lower_vbo.c index 1becc398edc..eb0d120c777 100644 --- a/src/asahi/lib/agx_nir_lower_vbo.c +++ b/src/asahi/lib/agx_nir_lower_vbo.c @@ -119,7 +119,7 @@ pass(struct nir_builder *b, nir_instr *instr, void *data) unsigned index = nir_intrinsic_base(intr) + nir_src_as_uint(*offset_src); struct agx_attribute attrib = vbufs->attributes[index]; - uint32_t stride = vbufs->strides[attrib.buf]; + uint32_t stride = attrib.stride; uint16_t offset = attrib.src_offset; const struct util_format_description *desc = diff --git a/src/asahi/lib/agx_nir_lower_vbo.h b/src/asahi/lib/agx_nir_lower_vbo.h index ddf5c31ca56..640d84869e8 100644 --- a/src/asahi/lib/agx_nir_lower_vbo.h +++ b/src/asahi/lib/agx_nir_lower_vbo.h @@ -23,6 +23,7 @@ extern "C" { */ struct agx_attribute { uint32_t divisor; + uint32_t stride; uint16_t src_offset; uint8_t buf; @@ -32,7 +33,6 @@ struct agx_attribute { struct agx_vbufs { unsigned count; - uint32_t strides[AGX_MAX_VBUFS]; struct agx_attribute attributes[AGX_MAX_ATTRIBS]; }; diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 18df562e376..d2a37e5ad5b 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -430,6 +430,8 @@ draw_set_vertex_elements(struct draw_context *draw, memcpy(draw->pt.vertex_element, elements, count * sizeof(elements[0])); draw->pt.nr_vertex_elements = count; + for (unsigned i = 0; i < count; i++) + draw->pt.vertex_strides[elements[i].vertex_buffer_index] = elements[i].src_stride; } diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 4e8b0011dc7..3516fce6325 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -247,26 +247,23 @@ create_jit_vertex_buffer_type(struct gallivm_state *gallivm, const char *struct_name) { LLVMTargetDataRef target = gallivm->target; - LLVMTypeRef elem_types[4]; + LLVMTypeRef elem_types[3]; LLVMTypeRef vb_type; - elem_types[0] = LLVMInt16TypeInContext(gallivm->context); - elem_types[1] = LLVMInt8TypeInContext(gallivm->context); - elem_types[2] = LLVMInt32TypeInContext(gallivm->context); - elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); + elem_types[0] = LLVMInt8TypeInContext(gallivm->context); + elem_types[1] = LLVMInt32TypeInContext(gallivm->context); + elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, ARRAY_SIZE(elem_types), 0); (void) target; /* silence unused var warning for non-debug build */ - LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, - target, vb_type, 0); LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, is_user_buffer, - target, vb_type, 1); + target, vb_type, 0); LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, - target, vb_type, 2); + target, vb_type, 1); LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer.resource, - target, vb_type, 3); + target, vb_type, 2); LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type); @@ -1742,14 +1739,14 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) util_format_get_blocksize(velem->src_format)); LLVMValueRef src_offset = lp_build_const_int32(gallivm, velem->src_offset); + LLVMValueRef src_stride = lp_build_const_int32(gallivm, + velem->src_stride); struct lp_build_if_state if_ctx; if (velem->src_format != PIPE_FORMAT_NONE) { vbuffer_ptr = LLVMBuildGEP2(builder, variant->buffer_type, vbuffers_ptr, &vb_index, 1, ""); vb_info = LLVMBuildGEP2(builder, variant->vb_type, vb_ptr, &vb_index, 1, ""); - vb_stride[j] = draw_jit_vbuffer_stride(gallivm, variant->vb_type, vb_info); - vb_stride[j] = LLVMBuildZExt(gallivm->builder, vb_stride[j], - LLVMInt32TypeInContext(context), ""); + vb_stride[j] = src_stride; vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, variant->vb_type, vb_info); map_ptr[j] = draw_jit_dvbuffer_map(gallivm, variant->buffer_type, vbuffer_ptr); buffer_size = draw_jit_dvbuffer_size(gallivm, variant->buffer_type, vbuffer_ptr); diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 2f66c807c55..65199feab1b 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -79,11 +79,8 @@ enum { lp_build_struct_get2(_variant->gallivm, _variant->context_type, _ptr, DRAW_VS_JIT_CTX_VIEWPORT, "viewports") -#define draw_jit_vbuffer_stride(_gallivm, _type, _ptr) \ - lp_build_struct_get2(_gallivm, _type, _ptr, 0, "stride") - #define draw_jit_vbuffer_offset(_gallivm, _type, _ptr) \ - lp_build_struct_get2(_gallivm, _type, _ptr, 2, "buffer_offset") + lp_build_struct_get2(_gallivm, _type, _ptr, 1, "buffer_offset") enum { DRAW_JIT_DVBUFFER_MAP = 0, diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 670b4829dc9..9d3a465c511 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -207,6 +207,7 @@ struct draw_context */ unsigned max_index; + unsigned vertex_strides[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned nr_vertex_elements; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 197548ee657..e983bd7dafd 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -304,7 +304,7 @@ draw_print_arrays(struct draw_context *draw, enum mesa_prim prim, } ptr += draw->pt.vertex_buffer[buf].buffer_offset; - ptr += draw->pt.vertex_buffer[buf].stride * ii; + ptr += draw->pt.vertex_element[j].src_stride * ii; ptr += draw->pt.vertex_element[j].src_offset; debug_printf(" Attr %u: ", j); @@ -444,7 +444,8 @@ resolve_draw_info(const struct pipe_draw_info *raw_info, const struct pipe_draw_start_count_bias *raw_draw, struct pipe_draw_info *info, struct pipe_draw_start_count_bias *draw, - struct pipe_vertex_buffer *vertex_buffer) + struct pipe_vertex_buffer *vertex_buffer, + struct pipe_vertex_element *vertex_element) { *info = *raw_info; *draw = *raw_draw; @@ -452,8 +453,8 @@ resolve_draw_info(const struct pipe_draw_info *raw_info, struct draw_so_target *target = (struct draw_so_target *)indirect->count_from_stream_output; assert(vertex_buffer != NULL); - draw->count = vertex_buffer->stride == 0 ? 0 : - target->internal_offset / vertex_buffer->stride; + draw->count = vertex_element->src_stride == 0 ? 0 : + target->internal_offset / vertex_element->src_stride; /* Stream output draw can not be indexed */ assert(!info->index_size); @@ -527,7 +528,8 @@ draw_vbo(struct draw_context *draw, if (indirect && indirect->count_from_stream_output) { resolve_draw_info(info, indirect, &draws[0], &resolved_info, - &resolved_draw, &(draw->pt.vertex_buffer[0])); + &resolved_draw, &(draw->pt.vertex_buffer[0]), + &(draw->pt.vertex_element[0])); use_info = &resolved_info; use_draws = &resolved_draw; num_draws = 1; @@ -562,18 +564,18 @@ draw_vbo(struct draw_context *draw, if (0) { debug_printf("Elements:\n"); for (unsigned i = 0; i < draw->pt.nr_vertex_elements; i++) { - debug_printf(" %u: src_offset=%u inst_div=%u vbuf=%u format=%s\n", + debug_printf(" %u: src_offset=%u src_stride=%u inst_div=%u vbuf=%u format=%s\n", i, draw->pt.vertex_element[i].src_offset, + draw->pt.vertex_element[i].src_stride, draw->pt.vertex_element[i].instance_divisor, draw->pt.vertex_element[i].vertex_buffer_index, util_format_name(draw->pt.vertex_element[i].src_format)); } debug_printf("Buffers:\n"); for (unsigned i = 0; i < draw->pt.nr_vertex_buffers; i++) { - debug_printf(" %u: stride=%u offset=%u size=%d ptr=%p\n", + debug_printf(" %u: offset=%u size=%d ptr=%p\n", i, - draw->pt.vertex_buffer[i].stride, draw->pt.vertex_buffer[i].buffer_offset, (int) draw->pt.user.vbuffer[i].size, draw->pt.user.vbuffer[i].map); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 910491397d0..f93e518b226 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -137,7 +137,6 @@ draw_pt_fetch_prepare(struct pt_fetch *fetch, } } - void draw_pt_fetch_run(struct pt_fetch *fetch, const unsigned *elts, @@ -152,7 +151,7 @@ draw_pt_fetch_run(struct pt_fetch *fetch, i, ((char *)draw->pt.user.vbuffer[i].map + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_strides[i], draw->pt.max_index); } @@ -179,7 +178,7 @@ draw_pt_fetch_run_linear(struct pt_fetch *fetch, i, ((char *)draw->pt.user.vbuffer[i].map + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_strides[i], draw->pt.max_index); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 26906c53115..0dd62425101 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -110,14 +110,11 @@ fse_prepare(struct draw_pt_middle_end *middle, */ fse->key.element[i].in.buffer = src->vertex_buffer_index; fse->key.element[i].in.offset = src->src_offset; + if (src->src_stride == 0) + fse->key.const_vbuffers |= (1<vertex_buffer_index); nr_vbs = MAX2(nr_vbs, src->vertex_buffer_index + 1); } - for (unsigned i = 0; i < 5 && i < nr_vbs; i++) { - if (draw->pt.vertex_buffer[i].stride == 0) - fse->key.const_vbuffers |= (1<key.const_vbuffers); @@ -160,7 +157,7 @@ fse_prepare(struct draw_pt_middle_end *middle, i, ((const uint8_t *) draw->pt.user.vbuffer[i].map + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].stride, + draw->pt.vertex_strides[i], draw->pt.max_index); } diff --git a/src/gallium/auxiliary/driver_trace/tr_dump_state.c b/src/gallium/auxiliary/driver_trace/tr_dump_state.c index fe48ab89076..676e8e5c731 100644 --- a/src/gallium/auxiliary/driver_trace/tr_dump_state.c +++ b/src/gallium/auxiliary/driver_trace/tr_dump_state.c @@ -701,7 +701,6 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) trace_dump_struct_begin("pipe_vertex_buffer"); - trace_dump_member(uint, state, stride); trace_dump_member(bool, state, is_user_buffer); trace_dump_member(uint, state, buffer_offset); trace_dump_member(ptr, state, buffer.resource); @@ -731,6 +730,7 @@ void trace_dump_vertex_element(const struct pipe_vertex_element *state) trace_dump_member(bool, state, dual_slot); trace_dump_member(format, state, src_format); + trace_dump_member(uint, state, src_stride); trace_dump_struct_end(); } diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c index 68118eadf36..58a217d3ecb 100644 --- a/src/gallium/auxiliary/hud/hud_context.c +++ b/src/gallium/auxiliary/hud/hud_context.c @@ -111,7 +111,6 @@ hud_draw_colored_prims(struct hud_context *hud, unsigned prim, num_vertices * 2 * sizeof(float), 16, buffer, &vbuffer.buffer_offset, &vbuffer.buffer.resource); u_upload_unmap(hud->pipe->stream_uploader); - vbuffer.stride = 2 * sizeof(float); cso_set_vertex_buffers(cso, 1, 0, false, &vbuffer); pipe_resource_reference(&vbuffer.buffer.resource, NULL); @@ -474,7 +473,6 @@ hud_prepare_vertices(struct hud_context *hud, struct vertex_queue *v, { v->num_vertices = 0; v->max_num_vertices = num_vertices; - v->vbuf.stride = stride; v->buffer_size = stride * num_vertices; } @@ -611,9 +609,11 @@ hud_draw_results(struct hud_context *hud, struct pipe_resource *tex) /* draw accumulated vertices for text */ if (hud->text.num_vertices) { cso_set_vertex_shader_handle(cso, hud->vs_text); + cso_set_vertex_elements(cso, &hud->text_velems); cso_set_vertex_buffers(cso, 1, 0, false, &hud->text.vbuf); cso_set_fragment_shader_handle(hud->cso, hud->fs_text); cso_draw_arrays(cso, MESA_PRIM_QUADS, 0, hud->text.num_vertices); + cso_set_vertex_elements(cso, &hud->velems); } pipe_resource_reference(&hud->text.vbuf.buffer.resource, NULL); @@ -1873,6 +1873,18 @@ hud_set_record_context(struct hud_context *hud, struct pipe_context *pipe) hud->record_pipe = pipe; } +static void +hud_init_velems(struct cso_velems_state *velems, unsigned stride) +{ + velems->count = 2; + for (unsigned i = 0; i < 2; i++) { + velems->velems[i].src_offset = i * 2 * sizeof(float); + velems->velems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; + velems->velems[i].vertex_buffer_index = 0; + velems->velems[i].src_stride = stride; + } +} + /** * Create the HUD. * @@ -2007,12 +2019,8 @@ hud_create(struct cso_context *cso, struct hud_context *share, hud->rasterizer_aa_lines.line_smooth = 1; /* vertex elements */ - hud->velems.count = 2; - for (i = 0; i < 2; i++) { - hud->velems.velems[i].src_offset = i * 2 * sizeof(float); - hud->velems.velems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; - hud->velems.velems[i].vertex_buffer_index = 0; - } + hud_init_velems(&hud->velems, 2 * sizeof(float)); + hud_init_velems(&hud->text_velems, 4 * sizeof(float)); /* sampler state (for font drawing) */ hud->font_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; diff --git a/src/gallium/auxiliary/hud/hud_private.h b/src/gallium/auxiliary/hud/hud_private.h index e4c05c929f5..28bef37b476 100644 --- a/src/gallium/auxiliary/hud/hud_private.h +++ b/src/gallium/auxiliary/hud/hud_private.h @@ -69,6 +69,7 @@ struct hud_context { struct pipe_rasterizer_state rasterizer, rasterizer_aa_lines; void *vs_color, *vs_text; struct cso_velems_state velems; + struct cso_velems_state text_velems; /* font */ struct util_font font; diff --git a/src/gallium/auxiliary/postprocess/pp_program.c b/src/gallium/auxiliary/postprocess/pp_program.c index fd7a9e6124e..db26b850805 100644 --- a/src/gallium/auxiliary/postprocess/pp_program.c +++ b/src/gallium/auxiliary/postprocess/pp_program.c @@ -115,10 +115,12 @@ pp_init_prog(struct pp_queue_t *ppq, struct pipe_context *pipe, p->velem.velems[0].instance_divisor = 0; p->velem.velems[0].vertex_buffer_index = 0; p->velem.velems[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + p->velem.velems[0].src_stride = 2 * 4 * sizeof(float); p->velem.velems[1].src_offset = 1 * 4 * sizeof(float); p->velem.velems[1].instance_divisor = 0; p->velem.velems[1].vertex_buffer_index = 0; p->velem.velems[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + p->velem.velems[1].src_stride = 2 * 4 * sizeof(float); if (!p->screen->is_format_supported(p->screen, PIPE_FORMAT_R32G32B32A32_FLOAT, diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index 860e6ce0a3f..f4a0a2f898b 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -320,6 +320,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) memset(&velem[0], 0, sizeof(velem[0]) * 2); for (i = 0; i < 2; i++) { velem[i].src_offset = i * 4 * sizeof(float); + velem[i].src_stride = 8 * sizeof(float); velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; velem[i].vertex_buffer_index = 0; } @@ -336,6 +337,7 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe) for (i = 0; i < 4; i++) { velem[0].src_format = formats[i]; velem[0].vertex_buffer_index = 0; + velem[0].src_stride = 0; ctx->velem_state_readbuf[i] = pipe->create_vertex_elements_state(pipe, 1, &velem[0]); } @@ -1401,8 +1403,6 @@ static void blitter_draw(struct blitter_context_priv *ctx, blitter_set_rectangle(ctx, x1, y1, x2, y2, depth); - vb.stride = 8 * sizeof(float); - u_upload_data(pipe->stream_uploader, 0, sizeof(ctx->vertices), 4, ctx->vertices, &vb.buffer_offset, &vb.buffer.resource); if (!vb.buffer.resource) @@ -2627,8 +2627,6 @@ void util_blitter_clear_buffer(struct blitter_context *blitter, if (!vb.buffer.resource) goto out; - vb.stride = 0; - util_blitter_set_running_flag(blitter); blitter_check_saved_vertex_states(ctx); blitter_disable_render_cond(ctx); diff --git a/src/gallium/auxiliary/util/u_draw.c b/src/gallium/auxiliary/util/u_draw.c index cd120405ea5..683c9c1520a 100644 --- a/src/gallium/auxiliary/util/u_draw.c +++ b/src/gallium/auxiliary/util/u_draw.c @@ -97,10 +97,10 @@ util_draw_max_index( buffer_size -= format_size; - if (buffer->stride != 0) { + if (element->src_stride != 0) { unsigned buffer_max_index; - buffer_max_index = buffer_size / buffer->stride; + buffer_max_index = buffer_size / element->src_stride; if (element->instance_divisor == 0) { /* Per-vertex data */ diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 72c4fa2f757..b4063183a8c 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -54,7 +54,6 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /* tell pipe about the vertex buffer */ memset(&vbuffer, 0, sizeof(vbuffer)); vbuffer.buffer.resource = vbuf; - vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = offset; /* note: vertex elements already set by caller */ @@ -84,7 +83,6 @@ util_draw_user_vertex_buffer(struct cso_context *cso, void *buffer, vbuffer.is_user_buffer = true; vbuffer.buffer.user = buffer; - vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */ /* note: vertex elements already set by caller */ diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index c46a2b2eb4d..9ada6bfc747 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -858,7 +858,6 @@ util_dump_vertex_buffer(FILE *stream, const struct pipe_vertex_buffer *state) util_dump_struct_begin(stream, "pipe_vertex_buffer"); - util_dump_member(stream, uint, state, stride); util_dump_member(stream, bool, state, is_user_buffer); util_dump_member(stream, uint, state, buffer_offset); util_dump_member(stream, ptr, state, buffer.resource); @@ -881,6 +880,7 @@ util_dump_vertex_element(FILE *stream, const struct pipe_vertex_element *state) util_dump_member(stream, uint, state, instance_divisor); util_dump_member(stream, uint, state, vertex_buffer_index); util_dump_member(stream, format, state, src_format); + util_dump_member(stream, uint, state, src_stride); util_dump_struct_end(stream); } diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h index 439e57827b7..6bccc629bf0 100644 --- a/src/gallium/auxiliary/util/u_inlines.h +++ b/src/gallium/auxiliary/util/u_inlines.h @@ -257,7 +257,6 @@ pipe_vertex_buffer_reference(struct pipe_vertex_buffer *dst, { if (dst->buffer.resource == src->buffer.resource) { /* Just copy the fields, don't touch reference counts. */ - dst->stride = src->stride; dst->is_user_buffer = src->is_user_buffer; dst->buffer_offset = src->buffer_offset; return; @@ -267,7 +266,6 @@ pipe_vertex_buffer_reference(struct pipe_vertex_buffer *dst, /* Don't use memcpy because there is a hole between variables. * dst can be used as a hash key. */ - dst->stride = src->stride; dst->is_user_buffer = src->is_user_buffer; dst->buffer_offset = src->buffer_offset; diff --git a/src/gallium/auxiliary/util/u_tests.c b/src/gallium/auxiliary/util/u_tests.c index 549b184b31a..351e1fd9643 100644 --- a/src/gallium/auxiliary/util/u_tests.c +++ b/src/gallium/auxiliary/util/u_tests.c @@ -146,6 +146,7 @@ util_set_interleaved_vertex_elements(struct cso_context *cso, for (i = 0; i < num_elements; i++) { velem.velems[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; velem.velems[i].src_offset = i * 16; + velem.velems[i].src_stride = num_elements * 4 * sizeof(float); } cso_set_vertex_elements(cso, &velem); diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index 19b4fea0b2c..a84ce7c41e3 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -2101,7 +2101,6 @@ tc_set_vertex_buffers(struct pipe_context *_pipe, struct pipe_resource *buf = src->buffer.resource; tc_assert(!src->is_user_buffer); - dst->stride = src->stride; dst->is_user_buffer = false; tc_set_resource_reference(&dst->buffer.resource, buf); dst->buffer_offset = src->buffer_offset; diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c index 9f0e77f446d..63e400e5b25 100644 --- a/src/gallium/auxiliary/util/u_vbuf.c +++ b/src/gallium/auxiliary/util/u_vbuf.c @@ -113,6 +113,8 @@ struct u_vbuf_elements { enum pipe_format native_format[PIPE_MAX_ATTRIBS]; unsigned native_format_size[PIPE_MAX_ATTRIBS]; unsigned component_size[PIPE_MAX_ATTRIBS]; + /* buffer-indexed */ + unsigned strides[PIPE_MAX_ATTRIBS]; /* Which buffers are used by the vertex element state. */ uint32_t used_vb_mask; @@ -139,6 +141,12 @@ struct u_vbuf_elements { /* Which buffers are used by multiple vertex attribs. */ uint32_t interleaved_vb_mask; + /* Which buffer has a non-zero stride. */ + uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */ + + /* Which buffer is incompatible (unaligned). */ + uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */ + void *driver_cso; }; @@ -190,8 +198,6 @@ struct u_vbuf { uint32_t user_vb_mask; /* each bit describes a corresp. buffer */ /* Which buffer is incompatible (unaligned). */ uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */ - /* Which buffer has a non-zero stride. */ - uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */ /* Which buffers are allowed (supported by hardware). */ uint32_t allowed_vb_mask; }; @@ -479,14 +485,15 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, unsigned offset; uint8_t *map; unsigned i = u_bit_scan(&mask); + unsigned stride = mgr->ve->strides[i]; vb = &mgr->vertex_buffer[i]; - offset = vb->buffer_offset + vb->stride * start_vertex; + offset = vb->buffer_offset + stride * start_vertex; if (vb->is_user_buffer) { map = (uint8_t*)vb->buffer.user + offset; } else { - unsigned size = vb->stride ? num_vertices * vb->stride + unsigned size = stride ? num_vertices * stride : sizeof(double)*4; if (!vb->buffer.resource) { @@ -495,7 +502,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, continue; } - if (vb->stride) { + if (stride) { /* the stride cannot be used to calculate the map size of the buffer, * as it only determines the bytes between elements, not the size of elements * themselves, meaning that if stride < element_size, the mapped size will @@ -504,7 +511,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, * instead, add the size of the largest possible attribute to the final attribute's offset * in order to ensure the map is large enough */ - unsigned last_offset = size - vb->stride; + unsigned last_offset = size - stride; size = MAX2(size, last_offset + sizeof(double)*4); } @@ -523,7 +530,7 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, * crashing (by reading past the end of a hardware buffer mapping) * when people do that. */ - num_vertices = (size + vb->stride - 1) / vb->stride; + num_vertices = (size + stride - 1) / stride; } map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size, @@ -532,10 +539,10 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, /* Subtract min_index so that indexing with the index buffer works. */ if (unroll_indices) { - map -= (ptrdiff_t)vb->stride * min_index; + map -= (ptrdiff_t)stride * min_index; } - tr->set_buffer(tr, i, map, vb->stride, info->max_index); + tr->set_buffer(tr, i, map, stride, info->max_index); } /* Translate. */ @@ -603,7 +610,6 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, /* Setup the new vertex buffer. */ mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset; - mgr->real_vertex_buffer[out_vb].stride = key->output_stride; /* Move the buffer reference. */ pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]); @@ -621,7 +627,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, unsigned fallback_vbs[VB_NUM]; /* Set the bit for each buffer which is incompatible, or isn't set. */ uint32_t unused_vb_mask = - mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask | + mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask | mgr->ve->incompatible_vb_mask | ~mgr->enabled_vb_mask; uint32_t unused_vb_mask_orig; bool insufficient_buffers = false; @@ -686,7 +692,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, struct translate_key key[VB_NUM]; unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */ unsigned i, type; - const unsigned incompatible_vb_mask = (misaligned | mgr->incompatible_vb_mask) & + const unsigned incompatible_vb_mask = (misaligned | mgr->incompatible_vb_mask | mgr->ve->incompatible_vb_mask) & mgr->ve->used_vb_mask; const int start[VB_NUM] = { @@ -709,7 +715,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, for (i = 0; i < mgr->ve->count; i++) { unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index; - if (!mgr->vertex_buffer[vb_index].stride) { + if (!mgr->ve->ve[i].src_stride) { if (!(mgr->ve->incompatible_elem_mask & (1 << i)) && !(incompatible_vb_mask & (1 << vb_index))) { continue; @@ -802,11 +808,6 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, unroll_indices && type == VB_VERTEX); if (err != PIPE_OK) return false; - - /* Fixup the stride for constant attribs. */ - if (type == VB_CONST) { - mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0; - } } } @@ -820,6 +821,12 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, mgr->fallback_velems.velems[i].src_offset = te->output_offset; mgr->fallback_velems.velems[i].vertex_buffer_index = mgr->fallback_vbs[type]; + /* Fixup the stride for constant attribs. */ + if (type == VB_CONST) + mgr->fallback_velems.velems[i].src_stride = 0; + else + mgr->fallback_velems.velems[i].src_stride = key[type].output_stride; + /* elem_index[type][i] can only be set for one type. */ assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u); assert(type > VB_VERTEX || elem_index[type+2][i] == ~0u); @@ -920,11 +927,23 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, ve->incompatible_vb_mask_any |= vb_index_bit; } else { ve->compatible_vb_mask_any |= vb_index_bit; - if (component_size == 2) + if (component_size == 2) { ve->vb_align_mask[0] |= vb_index_bit; - else if (component_size == 4) + if (ve->ve[i].src_stride % 2 != 0) + ve->incompatible_vb_mask |= vb_index_bit; + } + else if (component_size == 4) { ve->vb_align_mask[1] |= vb_index_bit; + if (ve->ve[i].src_stride % 4 != 0) + ve->incompatible_vb_mask |= vb_index_bit; + } } + ve->strides[ve->ve[i].vertex_buffer_index] = ve->ve[i].src_stride; + if (ve->ve[i].src_stride) { + ve->nonzero_stride_vb_mask |= 1 << ve->ve[i].vertex_buffer_index; + } + if (!mgr->caps.buffer_stride_unaligned && ve->ve[i].src_stride % 4 != 0) + ve->incompatible_vb_mask |= vb_index_bit; } if (used_buffers & ~mgr->allowed_vb_mask) { @@ -985,8 +1004,6 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, uint32_t user_vb_mask = 0; /* which buffers are incompatible with the driver */ uint32_t incompatible_vb_mask = 0; - /* which buffers have a non-zero stride */ - uint32_t nonzero_stride_vb_mask = 0; /* which buffers are unaligned to 2/4 bytes */ uint32_t unaligned_vb_mask[2] = {0}; uint32_t mask = ~BITFIELD64_MASK(count + unbind_num_trailing_slots); @@ -1000,7 +1017,6 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, /* Zero out the bits we are going to rewrite completely. */ mgr->user_vb_mask &= mask; mgr->incompatible_vb_mask &= mask; - mgr->nonzero_stride_vb_mask &= mask; mgr->enabled_vb_mask &= mask; mgr->unaligned_vb_mask[0] &= mask; mgr->unaligned_vb_mask[1] &= mask; @@ -1030,7 +1046,7 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, bool not_user = !vb->is_user_buffer && vb->is_user_buffer == orig_vb->is_user_buffer; /* struct isn't tightly packed: do not use memcmp */ - if (not_user && orig_vb->stride == vb->stride && + if (not_user && orig_vb->buffer_offset == vb->buffer_offset && orig_vb->buffer.resource == vb->buffer.resource) { mask |= BITFIELD_BIT(dst_index); if (take_ownership) { @@ -1050,32 +1066,26 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, pipe_vertex_buffer_reference(orig_vb, vb); } - if (vb->stride) { - nonzero_stride_vb_mask |= 1 << dst_index; - } enabled_vb_mask |= 1 << dst_index; - if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) || - (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) { + if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0)) { incompatible_vb_mask |= 1 << dst_index; real_vb->buffer_offset = vb->buffer_offset; - real_vb->stride = vb->stride; pipe_vertex_buffer_unreference(real_vb); real_vb->is_user_buffer = false; continue; } if (!mgr->caps.attrib_component_unaligned) { - if (vb->buffer_offset % 2 != 0 || vb->stride % 2 != 0) + if (vb->buffer_offset % 2 != 0) unaligned_vb_mask[0] |= BITFIELD_BIT(dst_index); - if (vb->buffer_offset % 4 != 0 || vb->stride % 4 != 0) + if (vb->buffer_offset % 4 != 0) unaligned_vb_mask[1] |= BITFIELD_BIT(dst_index); } if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) { user_vb_mask |= 1 << dst_index; real_vb->buffer_offset = vb->buffer_offset; - real_vb->stride = vb->stride; pipe_vertex_buffer_unreference(real_vb); real_vb->is_user_buffer = false; continue; @@ -1095,14 +1105,12 @@ void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr, /* Zero out the bits we are going to rewrite completely. */ mgr->user_vb_mask &= mask; mgr->incompatible_vb_mask &= mask; - mgr->nonzero_stride_vb_mask &= mask; mgr->enabled_vb_mask &= mask; mgr->unaligned_vb_mask[0] &= mask; mgr->unaligned_vb_mask[1] &= mask; mgr->user_vb_mask |= user_vb_mask; mgr->incompatible_vb_mask |= incompatible_vb_mask; - mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask; mgr->enabled_vb_mask |= enabled_vb_mask; mgr->unaligned_vb_mask[0] |= unaligned_vb_mask[0]; mgr->unaligned_vb_mask[1] |= unaligned_vb_mask[1]; @@ -1129,7 +1137,7 @@ get_upload_offset_size(struct u_vbuf *mgr, unsigned instance_div = velem->instance_divisor; *offset = vb->buffer_offset + velem->src_offset; - if (!vb->stride) { + if (!velem->src_stride) { /* Constant attrib. */ *size = ve->src_format_size[velem_index]; } else if (instance_div) { @@ -1144,12 +1152,12 @@ get_upload_offset_size(struct u_vbuf *mgr, if (count * instance_div != num_instances) count++; - *offset += vb->stride * start_instance; - *size = vb->stride * (count - 1) + ve->src_format_size[velem_index]; + *offset += velem->src_stride * start_instance; + *size = velem->src_stride * (count - 1) + ve->src_format_size[velem_index]; } else { /* Per-vertex attrib. */ - *offset += vb->stride * start_vertex; - *size = vb->stride * (num_vertices - 1) + ve->src_format_size[velem_index]; + *offset += velem->src_stride * start_vertex; + *size = velem->src_stride * (num_vertices - 1) + ve->src_format_size[velem_index]; } return true; } @@ -1262,11 +1270,11 @@ static bool u_vbuf_need_minmax_index(const struct u_vbuf *mgr, uint32_t misalign * elements. */ return (mgr->ve->used_vb_mask & ((mgr->user_vb_mask | - mgr->incompatible_vb_mask | + mgr->incompatible_vb_mask | mgr->ve->incompatible_vb_mask | misaligned | mgr->ve->incompatible_vb_mask_any) & mgr->ve->noninstance_vb_mask_any & - mgr->nonzero_stride_vb_mask)) != 0; + mgr->ve->nonzero_stride_vb_mask)) != 0; } static bool u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr, uint32_t misaligned) @@ -1278,10 +1286,11 @@ static bool u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr, uint32 return (mgr->ve->used_vb_mask & (~mgr->user_vb_mask & ~mgr->incompatible_vb_mask & + ~mgr->ve->incompatible_vb_mask & ~misaligned & mgr->ve->compatible_vb_mask_all & mgr->ve->noninstance_vb_mask_any & - mgr->nonzero_stride_vb_mask)) != 0; + mgr->ve->nonzero_stride_vb_mask)) != 0; } static void @@ -1469,7 +1478,7 @@ void u_vbuf_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *inf } } const uint32_t incompatible_vb_mask = - (mgr->incompatible_vb_mask | misaligned) & used_vb_mask; + (mgr->incompatible_vb_mask | mgr->ve->incompatible_vb_mask | misaligned) & used_vb_mask; /* Normal draw. No fallback and no user buffers. */ if (!incompatible_vb_mask && @@ -1695,7 +1704,7 @@ void u_vbuf_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *inf util_is_vbo_upload_ratio_too_large(new_draw.count, num_vertices) && !u_vbuf_mapping_vertex_buffer_blocks(mgr, misaligned)) { unroll_indices = true; - user_vb_mask &= ~(mgr->nonzero_stride_vb_mask & + user_vb_mask &= ~(mgr->ve->nonzero_stride_vb_mask & mgr->ve->noninstance_vb_mask_any); } } else { diff --git a/src/gallium/auxiliary/util/u_vertex_state_cache.c b/src/gallium/auxiliary/util/u_vertex_state_cache.c index 112cd79b4ae..b57e74c535d 100644 --- a/src/gallium/auxiliary/util/u_vertex_state_cache.c +++ b/src/gallium/auxiliary/util/u_vertex_state_cache.c @@ -75,7 +75,6 @@ util_vertex_state_cache_get(struct pipe_screen *screen, memset(&key, 0, sizeof(key)); key.input.indexbuf = indexbuf; - key.input.vbuffer.stride = buffer->stride; assert(!buffer->is_user_buffer); key.input.vbuffer.buffer_offset = buffer->buffer_offset; key.input.vbuffer.buffer = buffer->buffer; diff --git a/src/gallium/auxiliary/vl/vl_bicubic_filter.c b/src/gallium/auxiliary/vl/vl_bicubic_filter.c index 3281290be36..7ff833b1ad8 100644 --- a/src/gallium/auxiliary/vl/vl_bicubic_filter.c +++ b/src/gallium/auxiliary/vl/vl_bicubic_filter.c @@ -305,6 +305,7 @@ vl_bicubic_filter_init(struct vl_bicubic_filter *filter, struct pipe_context *pi ve.instance_divisor = 0; ve.vertex_buffer_index = 0; ve.src_format = PIPE_FORMAT_R32G32_FLOAT; + ve.src_stride = sizeof(struct vertex2f); filter->ves = pipe->create_vertex_elements_state(pipe, 1, &ve); if (!filter->ves) goto error_ves; diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 108c92c8bdb..894df6dc295 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -243,21 +243,23 @@ init_buffers(struct vl_compositor *c) /* * Create our vertex buffer and vertex buffer elements */ - c->vertex_buf.stride = sizeof(struct vertex2f) + sizeof(struct vertex4f) * 2; c->vertex_buf.buffer_offset = 0; c->vertex_buf.buffer.resource = NULL; c->vertex_buf.is_user_buffer = false; if (c->pipe_gfx_supported) { vertex_elems[0].src_offset = 0; + vertex_elems[0].src_stride = VL_COMPOSITOR_VB_STRIDE; vertex_elems[0].instance_divisor = 0; vertex_elems[0].vertex_buffer_index = 0; vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; vertex_elems[1].src_offset = sizeof(struct vertex2f); + vertex_elems[1].src_stride = VL_COMPOSITOR_VB_STRIDE; vertex_elems[1].instance_divisor = 0; vertex_elems[1].vertex_buffer_index = 0; vertex_elems[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; vertex_elems[2].src_offset = sizeof(struct vertex2f) + sizeof(struct vertex4f); + vertex_elems[1].src_stride = VL_COMPOSITOR_VB_STRIDE; vertex_elems[2].instance_divisor = 0; vertex_elems[2].vertex_buffer_index = 0; vertex_elems[2].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h index 3d55c31f260..b3873ae8d5c 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.h +++ b/src/gallium/auxiliary/vl/vl_compositor.h @@ -47,6 +47,8 @@ struct pipe_context; #define VL_COMPOSITOR_MIN_DIRTY (0) #define VL_COMPOSITOR_MAX_DIRTY (1 << 15) +#define VL_COMPOSITOR_VB_STRIDE (sizeof(struct vertex2f) + sizeof(struct vertex4f) * 2) + /* deinterlace allgorithem */ enum vl_compositor_deinterlace { diff --git a/src/gallium/auxiliary/vl/vl_compositor_gfx.c b/src/gallium/auxiliary/vl/vl_compositor_gfx.c index 5a0b7b15f4b..7244aeb7a1d 100644 --- a/src/gallium/auxiliary/vl/vl_compositor_gfx.c +++ b/src/gallium/auxiliary/vl/vl_compositor_gfx.c @@ -608,7 +608,7 @@ gen_vertex_data(struct vl_compositor *c, struct vl_compositor_state *s, struct u /* Allocate new memory for vertices. */ u_upload_alloc(c->pipe->stream_uploader, 0, - c->vertex_buf.stride * VL_COMPOSITOR_MAX_LAYERS * 4, /* size */ + VL_COMPOSITOR_VB_STRIDE * VL_COMPOSITOR_MAX_LAYERS * 4, /* size */ 4, /* alignment */ &c->vertex_buf.buffer_offset, &c->vertex_buf.buffer.resource, (void **)&vb); diff --git a/src/gallium/auxiliary/vl/vl_matrix_filter.c b/src/gallium/auxiliary/vl/vl_matrix_filter.c index db360f67d76..988e19298b9 100644 --- a/src/gallium/auxiliary/vl/vl_matrix_filter.c +++ b/src/gallium/auxiliary/vl/vl_matrix_filter.c @@ -193,6 +193,7 @@ vl_matrix_filter_init(struct vl_matrix_filter *filter, struct pipe_context *pipe goto error_quad; memset(&ve, 0, sizeof(ve)); + ve.src_stride = sizeof(struct vertex2f); ve.src_offset = 0; ve.instance_divisor = 0; ve.vertex_buffer_index = 0; diff --git a/src/gallium/auxiliary/vl/vl_median_filter.c b/src/gallium/auxiliary/vl/vl_median_filter.c index f2968ed881f..b9f037ed4ef 100644 --- a/src/gallium/auxiliary/vl/vl_median_filter.c +++ b/src/gallium/auxiliary/vl/vl_median_filter.c @@ -326,6 +326,7 @@ vl_median_filter_init(struct vl_median_filter *filter, struct pipe_context *pipe ve.instance_divisor = 0; ve.vertex_buffer_index = 0; ve.src_format = PIPE_FORMAT_R32G32_FLOAT; + ve.src_stride = sizeof(struct vertex2f); filter->ves = pipe->create_vertex_elements_state(pipe, 1, &ve); if (!filter->ves) goto error_ves; diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c index be02b82734a..69221987d31 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c @@ -47,7 +47,6 @@ vl_vb_upload_quads(struct pipe_context *pipe) assert(pipe); /* create buffer */ - quad.stride = sizeof(struct vertex2f); quad.buffer_offset = 0; quad.buffer.resource = pipe_buffer_create ( @@ -92,7 +91,6 @@ vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height) assert(pipe); /* create buffer */ - pos.stride = sizeof(struct vertex2s); pos.buffer_offset = 0; pos.buffer.resource = pipe_buffer_create ( @@ -167,14 +165,17 @@ vl_vb_get_ves_ycbcr(struct pipe_context *pipe) memset(&vertex_elems, 0, sizeof(vertex_elems)); vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); + vertex_elems[VS_I_RECT].src_stride = sizeof(struct vertex2f); /* Position element */ vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED; /* block num element */ vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R32_FLOAT; + vertex_elems[VS_I_BLOCK_NUM].src_stride = sizeof(struct vertex2f); vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 2, 1); + vertex_elems[VS_I_VPOS].src_stride = sizeof(struct vl_ycbcr_block); return pipe->create_vertex_elements_state(pipe, 3, vertex_elems); } @@ -188,19 +189,24 @@ vl_vb_get_ves_mv(struct pipe_context *pipe) memset(&vertex_elems, 0, sizeof(vertex_elems)); vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element(); + vertex_elems[VS_I_RECT].src_stride = sizeof(struct vertex2f); /* Position element */ vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED; vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1); + vertex_elems[VS_I_VPOS].src_stride = sizeof(struct vertex2s); /* motion vector TOP element */ vertex_elems[VS_I_MV_TOP].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED; + vertex_elems[VS_I_MV_TOP].src_stride = sizeof(struct vertex2f); /* motion vector BOTTOM element */ vertex_elems[VS_I_MV_BOTTOM].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED; + vertex_elems[VS_I_MV_BOTTOM].src_stride = sizeof(struct vertex2f); vl_vb_element_helper(&vertex_elems[VS_I_MV_TOP], 2, 2); + vertex_elems[VS_I_MV_TOP].src_stride = sizeof(struct vl_motionvector); return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems); } @@ -267,7 +273,6 @@ vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component) assert(buffer); - buf.stride = sizeof(struct vl_ycbcr_block); buf.buffer_offset = 0; buf.buffer.resource = buffer->ycbcr[component].resource; buf.is_user_buffer = false; @@ -282,7 +287,6 @@ vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector) assert(buffer); - buf.stride = sizeof(struct vl_motionvector); buf.buffer_offset = 0; buf.buffer.resource = buffer->mv[motionvector].resource; buf.is_user_buffer = false; diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index aeaaf814290..04d3b3850c4 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1791,9 +1791,9 @@ agx_create_shader_state(struct pipe_context *pctx, case PIPE_SHADER_VERTEX: { key.vs.vbuf.count = AGX_MAX_VBUFS; for (unsigned i = 0; i < AGX_MAX_VBUFS; ++i) { - key.vs.vbuf.strides[i] = 16; key.vs.vbuf.attributes[i] = (struct agx_attribute){ .buf = i, + .stride = 16, .format = PIPE_FORMAT_R32G32B32A32_FLOAT, }; } @@ -1904,10 +1904,6 @@ agx_update_vs(struct agx_context *ctx) memcpy(key.vbuf.attributes, ctx->attributes, sizeof(key.vbuf.attributes[0]) * AGX_MAX_ATTRIBS); - u_foreach_bit(i, ctx->vb_mask) { - key.vbuf.strides[i] = ctx->vertex_buffers[i].stride; - } - return agx_update_shader(ctx, &ctx->vs, PIPE_SHADER_VERTEX, (union asahi_shader_key *)&key); } diff --git a/src/gallium/drivers/crocus/crocus_state.c b/src/gallium/drivers/crocus/crocus_state.c index 29a695dfd24..408a1f69a00 100644 --- a/src/gallium/drivers/crocus/crocus_state.c +++ b/src/gallium/drivers/crocus/crocus_state.c @@ -3751,6 +3751,7 @@ struct crocus_vertex_element_state { #endif uint32_t step_rate[16]; uint8_t wa_flags[33]; + uint16_t strides[16]; unsigned count; }; @@ -3773,7 +3774,7 @@ crocus_create_vertex_elements(struct pipe_context *ctx, struct crocus_screen *screen = (struct crocus_screen *)ctx->screen; const struct intel_device_info *devinfo = &screen->devinfo; struct crocus_vertex_element_state *cso = - malloc(sizeof(struct crocus_vertex_element_state)); + calloc(1, sizeof(struct crocus_vertex_element_state)); cso->count = count; @@ -3835,6 +3836,7 @@ crocus_create_vertex_elements(struct pipe_context *ctx, #endif cso->step_rate[state[i].vertex_buffer_index] = state[i].instance_divisor; + cso->strides[state[i].vertex_buffer_index] = state[i].src_stride; switch (isl_format_get_num_channels(fmt.fmt)) { case 0: comp[0] = VFCOMP_STORE_0; FALLTHROUGH; @@ -7607,7 +7609,7 @@ crocus_upload_dirty_render_state(struct crocus_context *ice, emit_vertex_buffer_state(batch, i, bo, buf->buffer_offset, ice->state.vb_end[i], - buf->stride, + ice->state.cso_vertex_elements->strides[i], step_rate, &map); } diff --git a/src/gallium/drivers/d3d12/d3d12_context.cpp b/src/gallium/drivers/d3d12/d3d12_context.cpp index 17b180858e3..8e0a7753a53 100644 --- a/src/gallium/drivers/d3d12/d3d12_context.cpp +++ b/src/gallium/drivers/d3d12/d3d12_context.cpp @@ -136,6 +136,7 @@ d3d12_create_vertex_elements_state(struct pipe_context *pctx, if (!cso) return NULL; + unsigned max_vb = 0; for (unsigned i = 0; i < num_elements; ++i) { cso->elements[i].SemanticName = "TEXCOORD"; cso->elements[i].SemanticIndex = i; @@ -159,9 +160,12 @@ d3d12_create_vertex_elements_state(struct pipe_context *pctx, cso->elements[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; cso->elements[i].InstanceDataStepRate = 0; } + max_vb = MAX2(max_vb, elements[i].vertex_buffer_index); + cso->strides[elements[i].vertex_buffer_index] = elements[i].src_stride; } cso->num_elements = num_elements; + cso->num_buffers = num_elements ? max_vb + 1 : 0; return cso; } @@ -1326,7 +1330,6 @@ d3d12_set_vertex_buffers(struct pipe_context *pctx, continue; struct d3d12_resource *res = d3d12_resource(buf->buffer.resource); ctx->vbvs[i].BufferLocation = d3d12_resource_gpu_virtual_address(res) + buf->buffer_offset; - ctx->vbvs[i].StrideInBytes = buf->stride; ctx->vbvs[i].SizeInBytes = res->base.b.width0 - buf->buffer_offset; } ctx->state_dirty |= D3D12_DIRTY_VERTEX_BUFFERS; diff --git a/src/gallium/drivers/d3d12/d3d12_draw.cpp b/src/gallium/drivers/d3d12/d3d12_draw.cpp index fb9e0aec245..489e1686c38 100644 --- a/src/gallium/drivers/d3d12/d3d12_draw.cpp +++ b/src/gallium/drivers/d3d12/d3d12_draw.cpp @@ -855,7 +855,7 @@ update_draw_auto(struct d3d12_context *ctx, auto so_arg = indirect_in->count_from_stream_output; d3d12_stream_output_target *target = (d3d12_stream_output_target *)so_arg; - ctx->transform_state_vars[0] = ctx->vbs[0].stride; + ctx->transform_state_vars[0] = ctx->gfx_pipeline_state.ves->strides[0]; ctx->transform_state_vars[1] = ctx->vbs[0].buffer_offset - so_arg->buffer_offset; pipe_shader_buffer new_cs_ssbo; @@ -1123,8 +1123,17 @@ d3d12_draw_vbo(struct pipe_context *pctx, d3d12_batch_reference_resource(batch, res, false); } } - if (ctx->cmdlist_dirty & D3D12_DIRTY_VERTEX_BUFFERS) + if (ctx->cmdlist_dirty & (D3D12_DIRTY_VERTEX_BUFFERS | D3D12_DIRTY_VERTEX_ELEMENTS)) { + uint16_t *strides = ctx->gfx_pipeline_state.ves ? ctx->gfx_pipeline_state.ves->strides : NULL; + if (strides) { + for (unsigned i = 0; i < ctx->num_vbs; i++) + ctx->vbvs[i].StrideInBytes = strides[i]; + } else { + for (unsigned i = 0; i < ctx->num_vbs; i++) + ctx->vbvs[i].StrideInBytes = 0; + } ctx->cmdlist->IASetVertexBuffers(0, ctx->num_vbs, ctx->vbvs); + } if (index_buffer) { D3D12_INDEX_BUFFER_VIEW ibv; diff --git a/src/gallium/drivers/d3d12/d3d12_pipeline_state.h b/src/gallium/drivers/d3d12/d3d12_pipeline_state.h index 14d9d60b6cb..0e31523ba2c 100644 --- a/src/gallium/drivers/d3d12/d3d12_pipeline_state.h +++ b/src/gallium/drivers/d3d12/d3d12_pipeline_state.h @@ -40,9 +40,11 @@ struct d3d12_root_signature; struct d3d12_vertex_elements_state { D3D12_INPUT_ELEMENT_DESC elements[PIPE_MAX_ATTRIBS]; enum pipe_format format_conversion[PIPE_MAX_ATTRIBS]; + uint16_t strides[PIPE_MAX_ATTRIBS]; unsigned num_elements:6; // <= PIPE_MAX_ATTRIBS + unsigned num_buffers:6; // <= PIPE_MAX_ATTRIBS unsigned needs_format_emulation:1; - unsigned unused:25; + unsigned unused:19; }; struct d3d12_rasterizer_state { diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c b/src/gallium/drivers/etnaviv/etnaviv_emit.c index 3753a521324..08fdfdd38da 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_emit.c +++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c @@ -336,29 +336,30 @@ etna_emit_state(struct etna_context *ctx) for (int x = 0; x < ctx->vertex_buffer.count; ++x) { /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR); } - for (int x = 0; x < ctx->vertex_buffer.count; ++x) { - if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) { - /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL); - } - } } else if(screen->specs.stream_count > 1) { /* hw w/ multiple vertex streams */ for (int x = 0; x < ctx->vertex_buffer.count; ++x) { /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR); } - for (int x = 0; x < ctx->vertex_buffer.count; ++x) { - if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) { - /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_CONTROL); - } - } } else { /* hw w/ single vertex stream */ /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR); - /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_CONTROL); } } /* gallium has instance divisor as part of elements state */ - if ((dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) && screen->specs.halti >= 2) { + if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) { for (int x = 0; x < ctx->vertex_elements->num_buffers; ++x) { - /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_VERTEX_DIVISOR(x), ctx->vertex_elements->NFE_VERTEX_STREAMS_VERTEX_DIVISOR[x]); + if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) { + if (screen->specs.halti >= 2) + /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_elements->FE_VERTEX_STREAM_CONTROL[x]); + else if (screen->specs.stream_count > 1) + /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_elements->FE_VERTEX_STREAM_CONTROL[x]); + else + /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_elements->FE_VERTEX_STREAM_CONTROL[0]); + } + } + if (screen->specs.halti >= 2) { + for (int x = 0; x < ctx->vertex_elements->num_buffers; ++x) { + /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_VERTEX_DIVISOR(x), ctx->vertex_elements->NFE_VERTEX_STREAMS_VERTEX_DIVISOR[x]); + } } } diff --git a/src/gallium/drivers/etnaviv/etnaviv_internal.h b/src/gallium/drivers/etnaviv/etnaviv_internal.h index 7115abced6e..fa549d35078 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_internal.h +++ b/src/gallium/drivers/etnaviv/etnaviv_internal.h @@ -219,11 +219,11 @@ struct compiled_vertex_elements_state { uint32_t NFE_GENERIC_ATTRIB_CONFIG1[VIVS_NFE_GENERIC_ATTRIB__LEN]; unsigned num_buffers; uint32_t NFE_VERTEX_STREAMS_VERTEX_DIVISOR[VIVS_NFE_VERTEX_STREAMS__LEN]; + uint32_t FE_VERTEX_STREAM_CONTROL[VIVS_NFE_VERTEX_STREAMS__LEN]; }; /* Compiled context->set_vertex_buffer result */ struct compiled_set_vertex_buffer { - uint32_t FE_VERTEX_STREAM_CONTROL; struct etna_reloc FE_VERTEX_STREAM_BASE_ADDR; }; diff --git a/src/gallium/drivers/etnaviv/etnaviv_state.c b/src/gallium/drivers/etnaviv/etnaviv_state.c index f38b7112d2d..8c8b86e754b 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_state.c +++ b/src/gallium/drivers/etnaviv/etnaviv_state.c @@ -473,11 +473,8 @@ etna_set_vertex_buffers(struct pipe_context *pctx, unsigned num_buffers, cs->FE_VERTEX_STREAM_BASE_ADDR.bo = etna_resource(vbi->buffer.resource)->bo; cs->FE_VERTEX_STREAM_BASE_ADDR.offset = vbi->buffer_offset; cs->FE_VERTEX_STREAM_BASE_ADDR.flags = ETNA_RELOC_READ; - cs->FE_VERTEX_STREAM_CONTROL = - FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE(vbi->stride); } else { cs->FE_VERTEX_STREAM_BASE_ADDR.bo = NULL; - cs->FE_VERTEX_STREAM_CONTROL = 0; } } @@ -605,6 +602,8 @@ etna_vertex_elements_state_create(struct pipe_context *pctx, COND(nonconsecutive, VIVS_NFE_GENERIC_ATTRIB_CONFIG1_NONCONSECUTIVE) | VIVS_NFE_GENERIC_ATTRIB_CONFIG1_END(end_offset - start_offset); } + cs->FE_VERTEX_STREAM_CONTROL[buffer_idx] = + FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE(elements[idx].src_stride); if (util_format_is_pure_integer(elements[idx].src_format)) cs->NFE_GENERIC_ATTRIB_SCALE[idx] = 1; diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_program.c b/src/gallium/drivers/freedreno/a2xx/fd2_program.c index feea306f328..d60ab8c6795 100644 --- a/src/gallium/drivers/freedreno/a2xx/fd2_program.c +++ b/src/gallium/drivers/freedreno/a2xx/fd2_program.c @@ -175,7 +175,7 @@ patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem, instr->num_format_all = fmt.num_format; instr->format = fmt.format; instr->exp_adjust_all = fmt.exp_adjust; - instr->stride = ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index].stride; + instr->stride = elem->src_stride; instr->offset = elem->src_offset; } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 7dfcce3380d..32a2d6a69b0 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -428,7 +428,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2); OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) | - A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) | + A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(elem->src_stride) | COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) | COND(elem->instance_divisor, diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 739b1cfcbab..87f14f82289 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -573,7 +573,7 @@ fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4); OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) | - A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) | + A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(elem->src_stride) | COND(elem->instance_divisor, A4XX_VFD_FETCH_INSTR_0_INSTANCED) | COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT)); diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index ccfa34e0cbc..86307c21d9d 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -486,7 +486,7 @@ fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit) OUT_PKT4(ring, REG_A5XX_VFD_FETCH(j), 4); OUT_RELOC(ring, rsc->bo, off, 0, 0); OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */ - OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */ + OUT_RING(ring, elem->src_stride); /* VFD_FETCH[j].STRIDE */ OUT_PKT4(ring, REG_A5XX_VFD_DECODE(j), 2); OUT_RING( diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.cc b/src/gallium/drivers/freedreno/a6xx/fd6_context.cc index 2f404c67ac4..8931b46074a 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.cc @@ -83,7 +83,7 @@ fd6_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, memcpy(state->base.pipe, elements, sizeof(*elements) * num_elements); state->base.num_elements = num_elements; state->stateobj = - fd_ringbuffer_new_object(ctx->pipe, 4 * (num_elements * 2 + 1)); + fd_ringbuffer_new_object(ctx->pipe, 4 * (num_elements * 4 + 1)); struct fd_ringbuffer *ring = state->stateobj; OUT_PKT4(ring, REG_A6XX_VFD_DECODE(0), 2 * num_elements); @@ -106,6 +106,13 @@ fd6_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */ } + for (int32_t i = 0; i < num_elements; i++) { + const struct pipe_vertex_element *elem = &elements[i]; + + OUT_PKT4(ring, REG_A6XX_VFD_FETCH_STRIDE(elem->vertex_buffer_index), 1); + OUT_RING(ring, elem->src_stride); + } + return state; } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc index 0fff1bdf678..3ead08e2085 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.cc @@ -68,36 +68,26 @@ build_vbo_state(struct fd6_emit *emit) assert_dt { const struct fd_vertex_state *vtx = &emit->ctx->vtx; - /* Limit PKT4 size, because at max count (32) we would overflow the - * size of the PKT4 size field: - */ - const unsigned maxcnt = 16; const unsigned cnt = vtx->vertexbuf.count; - const unsigned dwords = (cnt * 4) /* per vbo: reg64 + two reg32 */ - + (1 + cnt / maxcnt); /* PKT4 hdr every 16 vbo's */ + const unsigned dwords = cnt * 4; /* per vbo: reg64 + one reg32 + pkt hdr */ struct fd_ringbuffer *ring = fd_submit_new_ringbuffer( emit->ctx->batch->submit, 4 * dwords, FD_RINGBUFFER_STREAMING); for (int32_t j = 0; j < cnt; j++) { - if ((j % maxcnt) == 0) { - unsigned sz = MIN2(maxcnt, cnt - j); - OUT_PKT4(ring, REG_A6XX_VFD_FETCH(j), 4 * sz); - } + OUT_PKT4(ring, REG_A6XX_VFD_FETCH(j), 3); const struct pipe_vertex_buffer *vb = &vtx->vertexbuf.vb[j]; struct fd_resource *rsc = fd_resource(vb->buffer.resource); if (rsc == NULL) { OUT_RING(ring, 0); OUT_RING(ring, 0); OUT_RING(ring, 0); - OUT_RING(ring, 0); } else { uint32_t off = vb->buffer_offset; uint32_t size = vb->buffer.resource->width0 - off; OUT_RELOC(ring, rsc->bo, off, 0, 0); OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */ - OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */ } } diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index 577ce4e5fee..36d8c921dfb 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -588,9 +588,9 @@ fd_context_setup_common_vbos(struct fd_context *ctx) .vertex_buffer_index = 0, .src_offset = 0, .src_format = PIPE_FORMAT_R32G32B32_FLOAT, + .src_stride = 12, }}); ctx->solid_vbuf_state.vertexbuf.count = 1; - ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12; ctx->solid_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->solid_vbuf; /* setup blit_vbuf_state: */ @@ -601,17 +601,17 @@ fd_context_setup_common_vbos(struct fd_context *ctx) .vertex_buffer_index = 0, .src_offset = 0, .src_format = PIPE_FORMAT_R32G32_FLOAT, + .src_stride = 8, }, { .vertex_buffer_index = 1, .src_offset = 0, .src_format = PIPE_FORMAT_R32G32B32_FLOAT, + .src_stride = 12, }}); ctx->blit_vbuf_state.vertexbuf.count = 2; - ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8; ctx->blit_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->blit_texcoord_vbuf; - ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12; ctx->blit_vbuf_state.vertexbuf.vb[1].buffer.resource = ctx->solid_vbuf; } diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 605304137fe..8cff6b8ad70 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -88,6 +88,7 @@ struct fd_vertexbuf_stateobj { struct fd_vertex_stateobj { struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS]; + unsigned strides[PIPE_MAX_ATTRIBS]; unsigned num_elements; }; diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c index 87068812a28..daa7934828b 100644 --- a/src/gallium/drivers/freedreno/freedreno_state.c +++ b/src/gallium/drivers/freedreno/freedreno_state.c @@ -468,9 +468,7 @@ fd_set_vertex_buffers(struct pipe_context *pctx, for (i = 0; i < count; i++) { bool new_enabled = vb && vb[i].buffer.resource; bool old_enabled = so->vb[i].buffer.resource != NULL; - uint32_t new_stride = vb ? vb[i].stride : 0; - uint32_t old_stride = so->vb[i].stride; - if ((new_enabled != old_enabled) || (new_stride != old_stride)) { + if (new_enabled != old_enabled) { fd_context_dirty(ctx, FD_DIRTY_VTXSTATE); break; } @@ -595,6 +593,8 @@ fd_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, memcpy(so->pipe, elements, sizeof(*elements) * num_elements); so->num_elements = num_elements; + for (unsigned i = 0; i < num_elements; i++) + so->strides[elements[i].vertex_buffer_index] = elements[i].src_stride; return so; } diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 7748d880e7e..27a7e5dd8b4 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -380,6 +380,8 @@ emit_state(struct iris_batch *batch, #define cso_changed(x) (!old_cso || (old_cso->x != new_cso->x)) #define cso_changed_memcmp(x) \ (!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0) +#define cso_changed_memcmp_elts(x, n) \ + (!old_cso || memcmp(old_cso->x, new_cso->x, n * sizeof(old_cso->x[0])) != 0) static void flush_before_state_base_change(struct iris_batch *batch) @@ -3990,7 +3992,7 @@ iris_set_vertex_buffers(struct pipe_context *ctx, iris_pack_state(GENX(VERTEX_BUFFER_STATE), state->state, vb) { vb.VertexBufferIndex = i; vb.AddressModifyEnable = true; - vb.BufferPitch = buffer->stride; + /* vb.BufferPitch is merged in dynamically from VE state later */ if (res) { vb.BufferSize = res->base.b.width0 - (int) buffer->buffer_offset; vb.BufferStartingAddress = @@ -4026,6 +4028,8 @@ struct iris_vertex_element_state { uint32_t vf_instancing[33 * GENX(3DSTATE_VF_INSTANCING_length)]; uint32_t edgeflag_ve[GENX(VERTEX_ELEMENT_STATE_length)]; uint32_t edgeflag_vfi[GENX(3DSTATE_VF_INSTANCING_length)]; + uint32_t stride[PIPE_MAX_ATTRIBS]; + unsigned vb_count; unsigned count; }; @@ -4048,9 +4052,10 @@ iris_create_vertex_elements(struct pipe_context *ctx, struct iris_screen *screen = (struct iris_screen *)ctx->screen; const struct intel_device_info *devinfo = screen->devinfo; struct iris_vertex_element_state *cso = - malloc(sizeof(struct iris_vertex_element_state)); + calloc(1, sizeof(struct iris_vertex_element_state)); cso->count = count; + cso->vb_count = 0; iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve) { ve.DWordLength = @@ -4109,6 +4114,8 @@ iris_create_vertex_elements(struct pipe_context *ctx, ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); vfi_pack_dest += GENX(3DSTATE_VF_INSTANCING_length); + cso->stride[state[i].vertex_buffer_index] = state[i].src_stride; + cso->vb_count = MAX2(state[i].vertex_buffer_index + 1, cso->vb_count); } /* An alternative version of the last VE and VFI is stored so it @@ -4159,6 +4166,12 @@ iris_bind_vertex_elements_state(struct pipe_context *ctx, void *state) ice->state.cso_vertex_elements = state; ice->state.dirty |= IRIS_DIRTY_VERTEX_ELEMENTS; + if (new_cso) { + /* re-emit vertex buffer state if stride changes */ + if (cso_changed(vb_count) || + cso_changed_memcmp_elts(stride, new_cso->vb_count)) + ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS; + } } /** @@ -7369,11 +7382,25 @@ iris_upload_dirty_render_state(struct iris_context *ice, } map += 1; + const struct iris_vertex_element_state *cso_ve = + ice->state.cso_vertex_elements; + bound = dynamic_bound; while (bound) { const int i = u_bit_scan64(&bound); - memcpy(map, genx->vertex_buffers[i].state, - sizeof(uint32_t) * vb_dwords); + + uint32_t vb_stride[GENX(VERTEX_BUFFER_STATE_length)]; + struct iris_bo *bo = + iris_resource_bo(genx->vertex_buffers[i].resource); + iris_pack_state(GENX(VERTEX_BUFFER_STATE), &vb_stride, vbs) { + vbs.BufferPitch = cso_ve->stride[i]; + /* Unnecessary except to defeat the genxml nonzero checker */ + vbs.MOCS = iris_mocs(bo, &screen->isl_dev, + ISL_SURF_USAGE_VERTEX_BUFFER_BIT); + } + for (unsigned d = 0; d < vb_dwords; d++) + map[d] = genx->vertex_buffers[i].state[d] | vb_stride[d]; + map += vb_dwords; } } diff --git a/src/gallium/drivers/lima/lima_draw.c b/src/gallium/drivers/lima/lima_draw.c index 856b24aacd1..e64f1e88c1a 100644 --- a/src/gallium/drivers/lima/lima_draw.c +++ b/src/gallium/drivers/lima/lima_draw.c @@ -837,8 +837,8 @@ lima_update_gp_attribute_info(struct lima_context *ctx, const struct pipe_draw_i unsigned start = info->index_size ? (ctx->min_index + draw->index_bias) : draw->start; attribute[n++] = res->bo->va + pvb->buffer_offset + pve->src_offset - + start * pvb->stride; - attribute[n++] = (pvb->stride << 11) | + + start * pve->src_stride; + attribute[n++] = (pve->src_stride << 11) | (lima_pipe_format_to_attrib_type(pve->src_format) << 2) | (util_format_get_nr_components(pve->src_format) - 1); } diff --git a/src/gallium/drivers/nouveau/nv30/nv30_push.c b/src/gallium/drivers/nouveau/nv30/nv30_push.c index 6286850ea21..b796c470c61 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_push.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_push.c @@ -221,9 +221,9 @@ nv30_push_vbo(struct nv30_context *nv30, const struct pipe_draw_info *info, vb->buffer_offset, NOUVEAU_BO_RD); if (apply_bias) - data += draw->index_bias * vb->stride; + data += draw->index_bias * nv30->vertex->strides[i]; - ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + ctx.translate->set_buffer(ctx.translate, i, data, nv30->vertex->strides[i], ~0); } if (info->index_size) { diff --git a/src/gallium/drivers/nouveau/nv30/nv30_state.h b/src/gallium/drivers/nouveau/nv30/nv30_state.h index ed3b8103a00..594063dc8b5 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_state.h +++ b/src/gallium/drivers/nouveau/nv30/nv30_state.h @@ -137,6 +137,7 @@ struct nv30_vertex_stateobj { struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS]; struct translate *translate; bool need_conversion; + uint16_t strides[PIPE_MAX_ATTRIBS]; unsigned num_elements; unsigned vtx_size; unsigned vtx_per_packet_max; diff --git a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c index a67363e096c..d609ea3b9f8 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c @@ -85,9 +85,9 @@ nv30_vbuf_range(struct nv30_context *nv30, int vbi, uint32_t *base, uint32_t *size) { assert(nv30->vbo_max_index != ~0); - *base = nv30->vbo_min_index * nv30->vtxbuf[vbi].stride; + *base = nv30->vbo_min_index * nv30->vertex->strides[vbi]; *size = (nv30->vbo_max_index - - nv30->vbo_min_index + 1) * nv30->vtxbuf[vbi].stride; + nv30->vbo_min_index + 1) * nv30->vertex->strides[vbi]; } static void @@ -102,7 +102,7 @@ nv30_prevalidate_vbufs(struct nv30_context *nv30) for (i = 0; i < nv30->num_vtxbufs; i++) { vb = &nv30->vtxbuf[i]; - if (!vb->stride || !vb->buffer.resource) /* NOTE: user_buffer not implemented */ + if (!nv30->vertex->strides[i] || !vb->buffer.resource) /* NOTE: user_buffer not implemented */ continue; buf = nv04_resource(vb->buffer.resource); @@ -114,7 +114,7 @@ nv30_prevalidate_vbufs(struct nv30_context *nv30) } else { if (buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) { nv30->vbo_user |= 1 << i; - assert(vb->stride > vb->buffer_offset); + assert(nv30->vertex->strides[i] > vb->buffer_offset); nv30_vbuf_range(nv30, i, &base, &size); nouveau_user_buffer_upload(&nv30->base, buf, base, size); } else { @@ -143,7 +143,7 @@ nv30_update_user_vbufs(struct nv30_context *nv30) if (!(nv30->vbo_user & (1 << b))) continue; - if (!vb->stride) { + if (!nv30->vertex->strides[i]) { nv30_emit_vtxattr(nv30, vb, ve, i); continue; } @@ -216,8 +216,8 @@ nv30_vbo_validate(struct nv30_context *nv30) ve = &vertex->pipe[i]; vb = &nv30->vtxbuf[ve->vertex_buffer_index]; - if (likely(vb->stride) || nv30->vbo_fifo) - PUSH_DATA (push, (vb->stride << 8) | vertex->element[i].state); + if (likely(vertex->strides[ve->vertex_buffer_index]) || nv30->vbo_fifo) + PUSH_DATA (push, (vertex->strides[ve->vertex_buffer_index] << 8) | vertex->element[i].state); else PUSH_DATA (push, NV30_3D_VTXFMT_TYPE_V32_FLOAT); } @@ -237,7 +237,7 @@ nv30_vbo_validate(struct nv30_context *nv30) res = nv04_resource(vb->buffer.resource); - if (nv30->vbo_fifo || unlikely(vb->stride == 0)) { + if (nv30->vbo_fifo || unlikely(ve->src_stride == 0)) { if (!nv30->vbo_fifo) nv30_emit_vtxattr(nv30, vb, ve, i); continue; @@ -306,6 +306,7 @@ nv30_vertex_state_create(struct pipe_context *pipe, unsigned num_elements, transkey.element[j].output_offset = transkey.output_stride; transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3; } + so->strides[vbi] = ve->src_stride; } so->translate = translate_create(&transkey); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_push.c b/src/gallium/drivers/nouveau/nv50/nv50_push.c index 7d315f74bc8..c4f293049aa 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_push.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_push.c @@ -276,9 +276,9 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info, data = vb->buffer.user; if (apply_bias && likely(!(nv50->vertex->instance_bufs & (1 << i)))) - data += (ptrdiff_t)(info->index_size ? draw->index_bias : 0) * vb->stride; + data += (ptrdiff_t)(info->index_size ? draw->index_bias : 0) * nv50->vertex->strides[i]; - ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); + ctx.translate->set_buffer(ctx.translate, i, data, nv50->vertex->strides[i], ~0); } if (info->index_size) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index 9cef689b0e4..3aff75c5702 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -1118,14 +1118,9 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, if (vb[i].is_user_buffer) { nv50->vbo_user |= 1 << dst_index; - if (!vb[i].stride) - nv50->vbo_constant |= 1 << dst_index; - else - nv50->vbo_constant &= ~(1 << dst_index); nv50->vtxbufs_coherent &= ~(1 << dst_index); } else { nv50->vbo_user &= ~(1 << dst_index); - nv50->vbo_constant &= ~(1 << dst_index); if (vb[i].buffer.resource && vb[i].buffer.resource->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h index 579da9a110c..3d4dc293826 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h @@ -52,10 +52,12 @@ struct nv50_vertex_element { struct nv50_vertex_stateobj { uint32_t min_instance_div[PIPE_MAX_ATTRIBS]; uint16_t vb_access_size[PIPE_MAX_ATTRIBS]; + uint16_t strides[PIPE_MAX_ATTRIBS]; struct translate *translate; unsigned num_elements; uint32_t instance_elts; uint32_t instance_bufs; + uint32_t vbo_constant; bool need_conversion; unsigned vertex_size; unsigned packet_vertex_limit; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index d71e1d559f5..1ad72f0df64 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -98,6 +98,9 @@ nv50_vertex_state_create(struct pipe_context *pipe, i, util_format_name(ve->src_format)); } so->element[i].state |= i; + so->strides[vbi] = ve->src_stride; + if (!ve->src_stride) + so->vbo_constant |= 1 << vbi; size = util_format_get_blocksize(fmt); if (so->vb_access_size[vbi] < (ve->src_offset + size)) @@ -191,14 +194,14 @@ nv50_user_vbuf_range(struct nv50_context *nv50, unsigned vbi, assert(vbi < PIPE_MAX_ATTRIBS); if (unlikely(nv50->vertex->instance_bufs & (1 << vbi))) { const uint32_t div = nv50->vertex->min_instance_div[vbi]; - *base = nv50->instance_off * nv50->vtxbuf[vbi].stride; - *size = (nv50->instance_max / div) * nv50->vtxbuf[vbi].stride + + *base = nv50->instance_off * nv50->vertex->strides[vbi]; + *size = (nv50->instance_max / div) * nv50->vertex->strides[vbi] + nv50->vertex->vb_access_size[vbi]; } else { /* NOTE: if there are user buffers, we *must* have index bounds */ assert(nv50->vb_elt_limit != ~0); - *base = nv50->vb_elt_first * nv50->vtxbuf[vbi].stride; - *size = nv50->vb_elt_limit * nv50->vtxbuf[vbi].stride + + *base = nv50->vb_elt_first * nv50->vertex->strides[vbi]; + *size = nv50->vb_elt_limit * nv50->vertex->strides[vbi] + nv50->vertex->vb_access_size[vbi]; } } @@ -215,7 +218,7 @@ nv50_upload_user_buffers(struct nv50_context *nv50, const struct pipe_vertex_buffer *vb = &nv50->vtxbuf[b]; uint32_t base, size; - if (!(nv50->vbo_user & (1 << b)) || !vb->stride) + if (!(nv50->vbo_user & (1 << b)) || !nv50->vertex->strides[b]) continue; nv50_user_vbuf_range(nv50, b, &base, &size); @@ -249,7 +252,7 @@ nv50_update_user_vbufs(struct nv50_context *nv50) if (!(nv50->vbo_user & (1 << b))) continue; - if (!vb->stride) { + if (!ve->src_stride) { nv50_emit_vtxattr(nv50, vb, ve, i); continue; } @@ -340,7 +343,7 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) ve = &vertex->element[i]; vb = &nv50->vtxbuf[b]; - if (likely(vb->stride) || !(nv50->vbo_user & (1 << b))) + if (likely(vertex->strides[b]) || !(nv50->vbo_user & (1 << b))) PUSH_DATA(push, ve->state); else PUSH_DATA(push, ve->state | NV50_3D_VERTEX_ARRAY_ATTRIB_CONST); @@ -396,13 +399,13 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50) if (unlikely(ve->pipe.instance_divisor)) { BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 4); - PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vertex->strides[b]); PUSH_DATAh(push, address); PUSH_DATA (push, address); PUSH_DATA (push, ve->pipe.instance_divisor); } else { BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 3); - PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATA (push, NV50_3D_VERTEX_ARRAY_FETCH_ENABLE | vertex->strides[b]); PUSH_DATAh(push, address); PUSH_DATA (push, address); } @@ -800,6 +803,8 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info, nv50->vbo_push_hint = /* the 64 is heuristic */ !(info->index_size && ((nv50->vb_elt_limit + 64) < draws[0].count)); + if (nv50->dirty_3d & (NV50_NEW_3D_ARRAYS | NV50_NEW_3D_VERTEX)) + nv50->vbo_constant = nv50->vertex->vbo_constant & nv50->vbo_user; if (nv50->vbo_user && !(nv50->dirty_3d & (NV50_NEW_3D_ARRAYS | NV50_NEW_3D_VERTEX))) { if (!!nv50->vbo_fifo != nv50->vbo_push_hint) nv50->dirty_3d |= NV50_NEW_3D_ARRAYS; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 446adb48552..c8f97fa8705 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -1071,14 +1071,9 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, if (vb[i].is_user_buffer) { nvc0->vbo_user |= 1 << dst_index; - if (!vb[i].stride && nvc0->screen->eng3d->oclass < GM107_3D_CLASS) - nvc0->constant_vbos |= 1 << dst_index; - else - nvc0->constant_vbos &= ~(1 << dst_index); nvc0->vtxbufs_coherent &= ~(1 << dst_index); } else { nvc0->vbo_user &= ~(1 << dst_index); - nvc0->constant_vbos &= ~(1 << dst_index); if (vb[i].buffer.resource && vb[i].buffer.resource->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h index e20032845cd..41b9212d9ee 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h @@ -51,10 +51,12 @@ struct nvc0_vertex_element { struct nvc0_vertex_stateobj { uint32_t min_instance_div[PIPE_MAX_ATTRIBS]; uint16_t vb_access_size[PIPE_MAX_ATTRIBS]; + uint16_t strides[PIPE_MAX_ATTRIBS]; struct translate *translate; unsigned num_elements; uint32_t instance_elts; uint32_t instance_bufs; + uint32_t constant_vbos; bool shared_slots; bool need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */ unsigned size; /* size of vertex in bytes (when packed) */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index 1d6b1ead57c..4e123eba0cb 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -51,6 +51,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, unsigned num_elements, const struct pipe_vertex_element *elements) { + struct nvc0_context *nvc0 = nvc0_context(pipe); struct nvc0_vertex_stateobj *so; struct translate_key transkey; unsigned i; @@ -114,6 +115,10 @@ nvc0_vertex_state_create(struct pipe_context *pipe, so->min_instance_div[vbi] = ve->instance_divisor; } + so->strides[vbi] = ve->src_stride; + if (!ve->src_stride && nvc0->screen->eng3d->oclass < GM107_3D_CLASS) + so->constant_vbos |= 1 << vbi; + if (1) { unsigned ca; unsigned j = transkey.nr_elements++; @@ -205,14 +210,14 @@ nvc0_user_vbuf_range(struct nvc0_context *nvc0, int vbi, { if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) { const uint32_t div = nvc0->vertex->min_instance_div[vbi]; - *base = nvc0->instance_off * nvc0->vtxbuf[vbi].stride; - *size = (nvc0->instance_max / div) * nvc0->vtxbuf[vbi].stride + + *base = nvc0->instance_off * nvc0->vertex->strides[vbi]; + *size = (nvc0->instance_max / div) * nvc0->vertex->strides[vbi] + nvc0->vertex->vb_access_size[vbi]; } else { /* NOTE: if there are user buffers, we *must* have index bounds */ assert(nvc0->vb_elt_limit != ~0); - *base = nvc0->vb_elt_first * nvc0->vtxbuf[vbi].stride; - *size = nvc0->vb_elt_limit * nvc0->vtxbuf[vbi].stride + + *base = nvc0->vb_elt_first * nvc0->vertex->strides[vbi]; + *size = nvc0->vb_elt_limit * nvc0->vertex->strides[vbi] + nvc0->vertex->vb_access_size[vbi]; } } @@ -340,7 +345,7 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0) PUSH_DATA (push, ve->pipe.instance_divisor); } BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1); - PUSH_DATA (push, (1 << 12) | vb->stride); + PUSH_DATA (push, (1 << 12) | vertex->strides[b]); } /* address/value set in nvc0_update_user_vbufs */ continue; @@ -351,13 +356,13 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0) if (unlikely(ve->pipe.instance_divisor)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4); - PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vertex->strides[b]); PUSH_DATAh(push, res->address + offset); PUSH_DATA (push, res->address + offset); PUSH_DATA (push, ve->pipe.instance_divisor); } else { BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3); - PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vertex->strides[b]); PUSH_DATAh(push, res->address + offset); PUSH_DATA (push, res->address + offset); } @@ -394,7 +399,7 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) if (mask & (1 << b)) { if (!(nvc0->constant_vbos & (1 << b))) { BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 1); - PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | nvc0->vertex->strides[b]); } /* address/value set in nvc0_update_user_vbufs_shared */ continue; @@ -408,7 +413,7 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) limit = buf->base.width0 - 1; BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 3); - PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | nvc0->vertex->strides[b]); PUSH_DATAh(push, buf->address + offset); PUSH_DATA (push, buf->address + offset); @@ -959,6 +964,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info, (!indirect || indirect->count_from_stream_output) && info->index_size && (nvc0->vb_elt_limit >= (draws[0].count * 2)); + if (nvc0->dirty_3d & (NVC0_NEW_3D_ARRAYS | NVC0_NEW_3D_VERTEX)) + nvc0->constant_vbos = nvc0->vertex->constant_vbos & nvc0->vbo_user; /* Check whether we want to switch vertex-submission mode. */ if (nvc0->vbo_user && !(nvc0->dirty_3d & (NVC0_NEW_3D_ARRAYS | NVC0_NEW_3D_VERTEX))) { if (nvc0->vbo_push_hint != !!nvc0->state.vbo_mode) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c index a0af5781abb..e7e18ca1d81 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c @@ -81,9 +81,9 @@ nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias) } if (index_bias && !unlikely(nvc0->vertex->instance_bufs & (1 << i))) - map += (intptr_t)index_bias * vb->stride; + map += (intptr_t)index_bias * nvc0->vertex->strides[i]; - translate->set_buffer(translate, i, map, vb->stride, ~0); + translate->set_buffer(translate, i, map, nvc0->vertex->strides[i], ~0); } } @@ -109,7 +109,7 @@ nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0, struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; struct nv04_resource *buf = nv04_resource(vb->buffer.resource); - ctx->edgeflag.stride = vb->stride; + ctx->edgeflag.stride = ve->src_stride; ctx->edgeflag.width = util_format_get_blocksize(ve->src_format); if (!vb->is_user_buffer) { unsigned offset = vb->buffer_offset + ve->src_offset; @@ -120,7 +120,7 @@ nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0, } if (index_bias) - ctx->edgeflag.data += (intptr_t)index_bias * vb->stride; + ctx->edgeflag.data += (intptr_t)index_bias * ve->src_stride; } static inline unsigned diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index c9e31766f4b..7abbbb73c39 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -109,6 +109,7 @@ struct panfrost_sampler_view { struct panfrost_vertex_state { unsigned num_elements; struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS]; + uint16_t strides[PIPE_MAX_ATTRIBS]; #if PAN_ARCH >= 9 /* Packed attribute descriptor. All fields are set at CSO create time @@ -955,10 +956,9 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch) for (unsigned i = 0; i < vtx->num_elements; ++i) { struct mali_attribute_packed packed; - unsigned vbi = vtx->pipe[i].vertex_buffer_index; pan_pack(&packed, ATTRIBUTE, cfg) { - cfg.stride = ctx->vertex_buffers[vbi].stride; + cfg.stride = vtx->pipe[i].src_stride; } pan_merge(packed, vtx->attributes[i], ATTRIBUTE); @@ -2054,7 +2054,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, mali_ptr *buffers) /* When there is a divisor, the hardware-level divisor is * the product of the instance divisor and the padded count */ - unsigned stride = buf->stride; + unsigned stride = so->strides[vbi]; unsigned hw_divisor = ctx->padded_count * divisor; if (ctx->instance_count <= 1) { @@ -2172,15 +2172,15 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch, mali_ptr *buffers) /* Base instance offset */ if (ctx->base_instance && so->pipe[i].instance_divisor) { - src_offset += - (ctx->base_instance * buf->stride) / so->pipe[i].instance_divisor; + src_offset += (ctx->base_instance * so->pipe[i].src_stride) / + so->pipe[i].instance_divisor; } /* Also, somewhat obscurely per-instance data needs to be * offset in response to a delayed start in an indexed draw */ if (so->pipe[i].instance_divisor && ctx->instance_count > 1) - src_offset -= buf->stride * ctx->offset_start; + src_offset -= so->pipe[i].src_stride * ctx->offset_start; pan_pack(out + i, ATTRIBUTE, cfg) { cfg.buffer_index = attrib_to_buffer[so->element_buffer[i]]; @@ -3981,6 +3981,8 @@ panfrost_create_vertex_elements_state(struct pipe_context *pctx, so->num_elements = num_elements; memcpy(so->pipe, elements, sizeof(*elements) * num_elements); + for (unsigned i = 0; i < num_elements; ++i) + so->strides[elements[i].vertex_buffer_index] = elements[i].src_stride; #if PAN_ARCH >= 9 for (unsigned i = 0; i < num_elements; ++i) panfrost_pack_attribute(dev, elements[i], &so->attributes[i]); diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 3ac7f9eb63c..ad0e4c0eb3d 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -953,18 +953,18 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, size1 = hw_format_size[i]; size2 = hw_format_size[i+1]; - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) | - R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); - OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride); + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(velem[i].src_stride) | + R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(velem[i+1].src_stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * velem[i].src_stride); + OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * velem[i+1].src_stride); } if (vertex_array_count & 1) { vb1 = &vbuf[velem[i].vertex_buffer_index]; size1 = hw_format_size[i]; - OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride)); - OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride); + OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(velem[i].src_stride)); + OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * velem[i].src_stride); } for (i = 0; i < vertex_array_count; i++) { @@ -982,18 +982,18 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, if (velem[i].instance_divisor) { stride1 = 0; offset1 = vb1->buffer_offset + velem[i].src_offset + - (instance_id / velem[i].instance_divisor) * vb1->stride; + (instance_id / velem[i].instance_divisor) * velem[i].src_stride; } else { - stride1 = vb1->stride; - offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; + stride1 = velem[i].src_stride; + offset1 = vb1->buffer_offset + velem[i].src_offset + offset * velem[i].src_stride; } if (velem[i+1].instance_divisor) { stride2 = 0; offset2 = vb2->buffer_offset + velem[i+1].src_offset + - (instance_id / velem[i+1].instance_divisor) * vb2->stride; + (instance_id / velem[i+1].instance_divisor) * velem[i+1].src_stride; } else { - stride2 = vb2->stride; - offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride; + stride2 = velem[i+1].src_stride; + offset2 = vb2->buffer_offset + velem[i+1].src_offset + offset * velem[i+1].src_stride; } OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) | @@ -1009,10 +1009,10 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, if (velem[i].instance_divisor) { stride1 = 0; offset1 = vb1->buffer_offset + velem[i].src_offset + - (instance_id / velem[i].instance_divisor) * vb1->stride; + (instance_id / velem[i].instance_divisor) * velem[i].src_stride; } else { - stride1 = vb1->stride; - offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride; + stride1 = velem[i].src_stride; + offset1 = vb1->buffer_offset + velem[i].src_offset + offset * velem[i].src_stride; } OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1)); diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index c1866333796..59dc0bc4c18 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -163,7 +163,7 @@ static void r300_split_index_bias(struct r300_context *r300, int index_bias, max_neg_bias = INT_MAX; for (i = 0; i < r300->velems->count; i++) { vb = &vbufs[velem[i].vertex_buffer_index]; - size = (vb->buffer_offset + velem[i].src_offset) / vb->stride; + size = (vb->buffer_offset + velem[i].src_offset) / velem[i].src_stride; max_neg_bias = MIN2(max_neg_bias, size); } @@ -372,7 +372,7 @@ static void r300_draw_arrays_immediate(struct r300_context *r300, size[i] = r300->velems->format_size[i] / 4; vbi = velem->vertex_buffer_index; vbuf = &r300->vertex_buffer[vbi]; - stride[i] = vbuf->stride / 4; + stride[i] = velem->src_stride / 4; /* Map the buffer. */ if (!map[vbi]) { @@ -752,7 +752,7 @@ static unsigned r300_max_vertex_count(struct r300_context *r300) /* We're not interested in constant and per-instance attribs. */ if (!vb->buffer.resource || - !vb->stride || + !velems[i].src_stride || velems[i].instance_divisor) { continue; } @@ -774,7 +774,7 @@ static unsigned r300_max_vertex_count(struct r300_context *r300) size -= value; /* Compute the max count. */ - max_count = 1 + size / vb->stride; + max_count = 1 + size / velems[i].src_stride; result = MIN2(result, max_count); } return result; diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 20daa0f4da8..b3c16fe067f 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -166,7 +166,6 @@ static void evergreen_cs_set_vertex_buffer(struct r600_context *rctx, { struct r600_vertexbuf_state *state = &rctx->cs_vertex_buffer_state; struct pipe_vertex_buffer *vb = &state->vb[vb_index]; - vb->stride = 1; vb->buffer_offset = offset; vb->buffer.resource = buffer; vb->is_user_buffer = false; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index e51ffb6000d..517e5d53db2 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2139,6 +2139,9 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600_resource *rbuffer; uint64_t va; unsigned buffer_index = u_bit_scan(&dirty_mask); + struct r600_fetch_shader *shader = (struct r600_fetch_shader*)&rctx->vertex_fetch_shader; + unsigned stride = pkt_flags == RADEON_CP_PACKET3_COMPUTE_MODE ? + 1 : shader->strides[buffer_index]; vb = &state->vb[buffer_index]; rbuffer = (struct r600_resource*)vb->buffer.resource; @@ -2153,7 +2156,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, radeon_emit(cs, rbuffer->b.b.width0 - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */ radeon_emit(cs, /* RESOURCEi_WORD2 */ S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_030008_STRIDE(vb->stride) | + S_030008_STRIDE(stride) | S_030008_BASE_ADDRESS_HI(va >> 32UL)); radeon_emit(cs, /* RESOURCEi_WORD3 */ S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 84a7661c435..0a40b84da98 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2815,6 +2815,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, uint32_t *bytecode; int i, j, r, fs_size; struct r600_fetch_shader *shader; + unsigned strides[PIPE_MAX_ATTRIBS]; assert(count < 32); @@ -2862,6 +2863,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, } } } + strides[elements[i].vertex_buffer_index] = elements[i].src_stride; } for (i = 0; i < count; i++) { @@ -2926,6 +2928,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, r600_bytecode_clear(&bc); return NULL; } + memcpy(shader->strides, strides, sizeof(strides)); u_suballocator_alloc(&rctx->allocator_fetch_shader, fs_size, 256, &shader->offset, diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 57b1d2bd827..04aab66127d 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -434,6 +434,7 @@ struct r600_cso_state struct r600_fetch_shader { struct r600_resource *buffer; unsigned offset; + unsigned strides[PIPE_MAX_ATTRIBS]; }; struct r600_shader_state { diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index dfd5cd28ac3..f1f33827720 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -204,7 +204,6 @@ void r600_draw_rectangle(struct blitter_context *blitter, /* draw */ struct pipe_vertex_buffer vbuffer = {}; vbuffer.buffer.resource = buf; - vbuffer.stride = 2 * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = offset; rctx->b.set_vertex_buffers(&rctx->b, 1, 0, false, &vbuffer); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 7d7382b94e1..6e686debdbe 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1670,6 +1670,8 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom struct r600_resource *rbuffer; unsigned offset; unsigned buffer_index = u_bit_scan(&dirty_mask); + struct r600_fetch_shader *shader = (struct r600_fetch_shader*)&rctx->vertex_fetch_shader; + unsigned stride = shader->strides[buffer_index]; vb = &rctx->vertex_buffer_state.vb[buffer_index]; rbuffer = (struct r600_resource*)vb->buffer.resource; @@ -1684,7 +1686,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */ radeon_emit(cs, /* RESOURCEi_WORD2 */ S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_038008_STRIDE(vb->stride)); + S_038008_STRIDE(stride)); radeon_emit(cs, 0); /* RESOURCEi_WORD3 */ radeon_emit(cs, 0); /* RESOURCEi_WORD4 */ radeon_emit(cs, 0); /* RESOURCEi_WORD5 */ diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 13a5dd3ebe2..bad1e484917 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -585,11 +585,9 @@ static void r600_set_vertex_buffers(struct pipe_context *ctx, if (input) { for (i = 0; i < count; i++) { if (likely((input[i].buffer.resource != vb[i].buffer.resource) || - (vb[i].stride != input[i].stride) || (vb[i].buffer_offset != input[i].buffer_offset) || (vb[i].is_user_buffer != input[i].is_user_buffer))) { if (input[i].buffer.resource) { - vb[i].stride = input[i].stride; vb[i].buffer_offset = input[i].buffer_offset; if (take_ownership) { pipe_resource_reference(&vb[i].buffer.resource, NULL); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index f21128e2d72..c71e67b8485 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -4946,6 +4946,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count, v->format_size[i] = desc->block.bits / 8; v->src_offset[i] = elements[i].src_offset; v->vertex_buffer_index[i] = vbo_index; + v->src_stride[i] = elements[i].src_stride; bool always_fix = false; union si_vs_fix_fetch fix_fetch; @@ -5049,7 +5050,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count, (sscreen->info.gfx_level == GFX6 || sscreen->info.gfx_level >= GFX10); bool opencode = sscreen->options.vs_fetch_always_opencode; - if (check_alignment && (elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0) + if (check_alignment && ((elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0 || elements[i].src_stride & 3)) opencode = true; if (always_fix || check_alignment || opencode) @@ -5180,7 +5181,7 @@ static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned count, /* Only unreference bound vertex buffers. (take_ownership) */ pipe_resource_reference(&dst->buffer.resource, NULL); - if (src->buffer_offset & 3 || src->stride & 3) + if (src->buffer_offset & 3) unaligned |= slot_bit; if (buf) { @@ -5200,9 +5201,8 @@ static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned count, pipe_resource_reference(&dst->buffer.resource, buf); dst->buffer_offset = src->buffer_offset; - dst->stride = src->stride; - if (dst->buffer_offset & 3 || dst->stride & 3) + if (dst->buffer_offset & 3) unaligned |= slot_bit; if (buf) { @@ -5263,12 +5263,12 @@ si_create_vertex_state(struct pipe_screen *screen, assert(!state->velems.instance_divisor_is_one); assert(!state->velems.instance_divisor_is_fetched); assert(!state->velems.fix_fetch_always); - assert(buffer->stride % 4 == 0); assert(buffer->buffer_offset % 4 == 0); assert(!buffer->is_user_buffer); for (unsigned i = 0; i < num_elements; i++) { assert(elements[i].src_offset % 4 == 0); assert(!elements[i].dual_slot); + assert(elements[i].src_stride % 4 == 0); } for (unsigned i = 0; i < num_elements; i++) { diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 8bbc8a806c8..3b8284c8e69 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -126,6 +126,7 @@ struct si_vertex_elements { struct si_resource *instance_divisor_factor_buffer; uint32_t rsrc_word3[SI_MAX_ATTRIBS]; uint16_t src_offset[SI_MAX_ATTRIBS]; + uint16_t src_stride[SI_MAX_ATTRIBS]; uint8_t fix_fetch[SI_MAX_ATTRIBS]; uint8_t format_size[SI_MAX_ATTRIBS]; uint8_t vertex_buffer_index[SI_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index e5f595cda5b..a11d9cf2f25 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -1994,11 +1994,12 @@ static void ALWAYS_INLINE si_set_vb_descriptor(struct si_vertex_elements *velems } uint64_t va = buf->gpu_address + offset; + unsigned stride = velems->src_stride[index]; int64_t num_records = (int64_t)buf->b.b.width0 - offset; - if (GFX_VERSION != GFX8 && vb->stride) { + if (GFX_VERSION != GFX8 && stride) { /* Round up by rounding down and adding 1 */ - num_records = (num_records - velems->format_size[index]) / vb->stride + 1; + num_records = (num_records - velems->format_size[index]) / stride + 1; } assert(num_records >= 0 && num_records <= UINT_MAX); @@ -2009,11 +2010,11 @@ static void ALWAYS_INLINE si_set_vb_descriptor(struct si_vertex_elements *velems * - 3: offset >= NUM_RECORDS (Raw) */ if (GFX_VERSION >= GFX10) - rsrc_word3 |= S_008F0C_OOB_SELECT(vb->stride ? V_008F0C_OOB_SELECT_STRUCTURED - : V_008F0C_OOB_SELECT_RAW); + rsrc_word3 |= S_008F0C_OOB_SELECT(stride ? V_008F0C_OOB_SELECT_STRUCTURED + : V_008F0C_OOB_SELECT_RAW); desc[0] = va; - desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(vb->stride); + desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); desc[2] = num_records; desc[3] = rsrc_word3; } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 78ab64bfdf3..af5cf848841 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -2062,7 +2062,7 @@ void si_vs_key_update_inputs(struct si_context *sctx) unsigned vbidx = elts->vertex_buffer_index[i]; struct pipe_vertex_buffer *vb = &sctx->vertex_buffer[vbidx]; unsigned align_mask = (1 << log_hw_load_size) - 1; - if (vb->buffer_offset & align_mask || vb->stride & align_mask) { + if (vb->buffer_offset & align_mask) { fix |= 1 << i; opencode |= 1 << i; } diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h index f058866b7d7..93e4155ecaa 100644 --- a/src/gallium/drivers/svga/svga_context.h +++ b/src/gallium/drivers/svga/svga_context.h @@ -262,6 +262,7 @@ struct svga_velems_state { unsigned count; struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; SVGA3dDeclType decl_type[PIPE_MAX_ATTRIBS]; /**< vertex attrib formats */ + uint16_t strides[PIPE_MAX_ATTRIBS]; /** Bitmasks indicating which attributes need format conversion */ unsigned adjust_attrib_range; /**< range adjustment */ diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 00c8a8cf38d..2e11601c667 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -702,7 +702,7 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl, /* Set IA slot0 input buffer to the SO buffer */ assert(vbuf_count == 1); - vbuffer_attrs[0].stride = hwtnl->cmd.vbufs[0].stride; + vbuffer_attrs[0].stride = svga->curr.velems->strides[0]; vbuffer_attrs[0].offset = hwtnl->cmd.vbufs[0].buffer_offset; vbuffer_attrs[0].sid = 0; assert(so_vertex_count->buffer != NULL); @@ -717,7 +717,7 @@ validate_vertex_buffers(struct svga_hwtnl *hwtnl, struct svga_buffer *sbuf = svga_buffer(hwtnl->cmd.vbufs[i].buffer.resource); - vbuffer_attrs[i].stride = hwtnl->cmd.vbufs[i].stride; + vbuffer_attrs[i].stride = svga->curr.velems->strides[i]; vbuffer_attrs[i].offset = hwtnl->cmd.vbufs[i].buffer_offset; vbuffer_attrs[i].sid = 0; diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c index 2002f39437c..abfd303c516 100644 --- a/src/gallium/drivers/svga/svga_pipe_vertex.c +++ b/src/gallium/drivers/svga/svga_pipe_vertex.c @@ -261,6 +261,8 @@ svga_create_vertex_elements_state(struct pipe_context *pipe, else { translate_vertex_decls(svga, velems); } + for (unsigned i = 0; i < count; i++) + velems->strides[attribs[i].vertex_buffer_index] = attribs[i].src_stride; } svga->hud.num_vertexelement_objects++; diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c index 8a139685cc8..bc8870e338b 100644 --- a/src/gallium/drivers/svga/svga_state_vdecl.c +++ b/src/gallium/drivers/svga/svga_state_vdecl.c @@ -79,8 +79,8 @@ emit_hw_vs_vdecl(struct svga_context *svga, uint64_t dirty) buffer = svga_buffer(vb->buffer.resource); if (buffer->uploaded.start > offset) { tmp_neg_bias = buffer->uploaded.start - offset; - if (vb->stride) - tmp_neg_bias = (tmp_neg_bias + vb->stride - 1) / vb->stride; + if (ve->src_stride) + tmp_neg_bias = (tmp_neg_bias + ve->src_stride - 1) / ve->src_stride; neg_bias = MAX2(neg_bias, tmp_neg_bias); } } @@ -103,14 +103,14 @@ emit_hw_vs_vdecl(struct svga_context *svga, uint64_t dirty) decls[i].identity.method = SVGA3D_DECLMETHOD_DEFAULT; decls[i].identity.usage = usage; decls[i].identity.usageIndex = index; - decls[i].array.stride = vb->stride; + decls[i].array.stride = ve->src_stride; /* Compensate for partially uploaded vbo, and * for the negative index bias. */ decls[i].array.offset = (vb->buffer_offset + ve[i].src_offset - + neg_bias * vb->stride + + neg_bias * ve->src_stride - buffer->uploaded.start); assert(decls[i].array.offset >= 0); diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c index 7f5f1be650d..838d4c42f7b 100644 --- a/src/gallium/drivers/svga/svga_swtnl_backend.c +++ b/src/gallium/drivers/svga/svga_swtnl_backend.c @@ -252,7 +252,6 @@ svga_vbuf_submit_state(struct svga_vbuf_render *svga_render) vb.is_user_buffer = false; vb.buffer.resource = svga_render->vbuf; vb.buffer_offset = svga_render->vdecl_offset; - vb.stride = vdecl[0].array.stride; svga_hwtnl_vertex_buffers(svga->hwtnl, 1, &vb); } diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c index 17442500ea9..5f93421e83a 100644 --- a/src/gallium/drivers/v3d/v3dx_draw.c +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -743,7 +743,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, &vtx->attrs[i * size], attr) { - attr.stride = vb->stride; + attr.stride = elem->src_stride; attr.address = cl_address(rsc->bo, vb->buffer_offset + elem->src_offset); diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index f00e8d0a8f7..79af1495847 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -192,7 +192,7 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, /* not vc4->dirty tracked: vc4->last_index_bias */ uint32_t offset = (vb->buffer_offset + elem->src_offset + - vb->stride * (index_bias + + elem->src_stride * (index_bias + extra_index_bias)); uint32_t vb_size = rsc->bo->size - offset; uint32_t elem_size = @@ -201,16 +201,16 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4, cl_emit(&job->shader_rec, ATTRIBUTE_RECORD, attr) { attr.address = cl_address(rsc->bo, offset); attr.number_of_bytes_minus_1 = elem_size - 1; - attr.stride = vb->stride; + attr.stride = elem->src_stride; attr.coordinate_shader_vpm_offset = vc4->prog.cs->vattr_offsets[i]; attr.vertex_shader_vpm_offset = vc4->prog.vs->vattr_offsets[i]; } - if (vb->stride > 0) { + if (elem->src_stride > 0) { max_index = MIN2(max_index, - (vb_size - elem_size) / vb->stride); + (vb_size - elem_size) / elem->src_stride); } } diff --git a/src/gallium/drivers/virgl/virgl_context.c b/src/gallium/drivers/virgl/virgl_context.c index 14748239e60..0720561fe96 100644 --- a/src/gallium/drivers/virgl/virgl_context.c +++ b/src/gallium/drivers/virgl/virgl_context.c @@ -531,6 +531,8 @@ static void *virgl_create_vertex_elements_state(struct pipe_context *ctx, break; } } + for (int i = 0; i < num_elements; ++i) + state->strides[elements[i].vertex_buffer_index] = elements[i].src_stride; state->handle = virgl_object_assign_handle(); virgl_encoder_create_vertex_elements(vctx, state->handle, @@ -589,7 +591,7 @@ static void virgl_set_vertex_buffers(struct pipe_context *ctx, static void virgl_hw_set_vertex_buffers(struct virgl_context *vctx) { if (vctx->vertex_array_dirty) { - struct virgl_vertex_elements_state *ve = vctx->vertex_elements; + const struct virgl_vertex_elements_state *ve = vctx->vertex_elements; if (ve->num_bindings) { struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/virgl/virgl_context.h b/src/gallium/drivers/virgl/virgl_context.h index d65657dfd7c..3ccc9c29eee 100644 --- a/src/gallium/drivers/virgl/virgl_context.h +++ b/src/gallium/drivers/virgl/virgl_context.h @@ -107,6 +107,7 @@ struct virgl_vertex_elements_state { uint32_t handle; uint8_t binding_map[PIPE_MAX_ATTRIBS]; uint8_t num_bindings; + uint16_t strides[PIPE_MAX_ATTRIBS]; }; static inline struct virgl_sampler_view * diff --git a/src/gallium/drivers/virgl/virgl_encode.c b/src/gallium/drivers/virgl/virgl_encode.c index 199a52e0d53..10429f61451 100644 --- a/src/gallium/drivers/virgl/virgl_encode.c +++ b/src/gallium/drivers/virgl/virgl_encode.c @@ -760,7 +760,7 @@ int virgl_encoder_set_vertex_buffers(struct virgl_context *ctx, virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_SET_VERTEX_BUFFERS, 0, VIRGL_SET_VERTEX_BUFFERS_SIZE(num_buffers))); for (i = 0; i < num_buffers; i++) { struct virgl_resource *res = virgl_resource(buffers[i].buffer.resource); - virgl_encoder_write_dword(ctx->cbuf, buffers[i].stride); + virgl_encoder_write_dword(ctx->cbuf, ctx->vertex_elements ? ctx->vertex_elements->strides[i] : 0); virgl_encoder_write_dword(ctx->cbuf, buffers[i].buffer_offset); virgl_encoder_write_res(ctx, res); } diff --git a/src/gallium/drivers/virgl/virgl_encode.h b/src/gallium/drivers/virgl/virgl_encode.h index 94595de3aa6..a27951a3b4b 100644 --- a/src/gallium/drivers/virgl/virgl_encode.h +++ b/src/gallium/drivers/virgl/virgl_encode.h @@ -38,6 +38,7 @@ struct virgl_transfer; struct virgl_sampler_view; struct virgl_video_codec; struct virgl_video_buffer; +struct virgl_vertex_elements_state; struct virgl_surface { struct pipe_surface base; diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index 0d49c169634..74da7e7ed7c 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -1284,13 +1284,11 @@ zink_set_vertex_buffers(struct pipe_context *pctx, uint32_t enabled_buffers = ctx->gfx_pipeline_state.vertex_buffers_enabled_mask; enabled_buffers |= u_bit_consecutive(0, num_buffers); enabled_buffers &= ~u_bit_consecutive(num_buffers, unbind_num_trailing_slots); - bool stride_changed = false; if (buffers) { for (unsigned i = 0; i < num_buffers; ++i) { const struct pipe_vertex_buffer *vb = buffers + i; struct pipe_vertex_buffer *ctx_vb = &ctx->vertex_buffers[i]; - stride_changed |= ctx_vb->stride != vb->stride; update_existing_vbo(ctx, i); if (!take_ownership) pipe_resource_reference(&ctx_vb->buffer.resource, vb->buffer.resource); @@ -1305,7 +1303,6 @@ zink_set_vertex_buffers(struct pipe_context *pctx, res->gfx_barrier |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; res->barrier_access[0] |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; update_res_bind_count(ctx, res, false, false); - ctx_vb->stride = vb->stride; ctx_vb->buffer_offset = vb->buffer_offset; /* always barrier before possible rebind */ zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, @@ -1328,7 +1325,7 @@ zink_set_vertex_buffers(struct pipe_context *pctx, } if (need_state_change) ctx->vertex_state_changed = true; - else if (!have_input_state && (stride_changed || ctx->gfx_pipeline_state.vertex_buffers_enabled_mask != enabled_buffers)) + else if (!have_input_state && ctx->gfx_pipeline_state.vertex_buffers_enabled_mask != enabled_buffers) ctx->vertex_state_changed = true; ctx->gfx_pipeline_state.vertex_buffers_enabled_mask = enabled_buffers; ctx->vertex_buffers_dirty = num_buffers > 0; diff --git a/src/gallium/drivers/zink/zink_draw.cpp b/src/gallium/drivers/zink/zink_draw.cpp index b8d5ef134b1..f687bb250e8 100644 --- a/src/gallium/drivers/zink/zink_draw.cpp +++ b/src/gallium/drivers/zink/zink_draw.cpp @@ -132,7 +132,7 @@ bind_vertex_buffers_dgc(struct zink_context *ctx) assert(res->obj->bda); ptr->bufferAddress = res->obj->bda + vb->buffer_offset; ptr->size = res->base.b.width0; - ptr->stride = vb->stride; + ptr->stride = ctx->element_state->hw_state.b.strides[ctx->element_state->hw_state.binding_map[i]]; } else { ptr->bufferAddress = 0; ptr->size = 0; @@ -147,7 +147,6 @@ zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx) { VkBuffer buffers[PIPE_MAX_ATTRIBS]; VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS]; - VkDeviceSize buffer_strides[PIPE_MAX_ATTRIBS]; struct zink_vertex_elements_state *elems = ctx->element_state; struct zink_screen *screen = zink_screen(ctx->base.screen); @@ -159,15 +158,9 @@ zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx) assert(res->obj->buffer); buffers[i] = res->obj->buffer; buffer_offsets[i] = vb->buffer_offset; - buffer_strides[i] = vb->stride; - if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2 || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT) - elems->hw_state.dynbindings[i].stride = vb->stride; } else { buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; buffer_offsets[i] = 0; - buffer_strides[i] = 0; - if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2 || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT) - elems->hw_state.dynbindings[i].stride = 0; } } @@ -177,7 +170,7 @@ zink_bind_vertex_buffers(struct zink_batch *batch, struct zink_context *ctx) if (elems->hw_state.num_bindings) VKCTX(CmdBindVertexBuffers2EXT)(batch->state->cmdbuf, 0, elems->hw_state.num_bindings, - buffers, buffer_offsets, NULL, buffer_strides); + buffers, buffer_offsets, NULL, (VkDeviceSize*)elems->hw_state.b.strides); } else if (elems->hw_state.num_bindings) VKSCR(CmdBindVertexBuffers)(batch->state->cmdbuf, 0, elems->hw_state.num_bindings, diff --git a/src/gallium/drivers/zink/zink_program_state.hpp b/src/gallium/drivers/zink/zink_program_state.hpp index c74362a03ad..0c6c85a95a4 100644 --- a/src/gallium/drivers/zink/zink_program_state.hpp +++ b/src/gallium/drivers/zink/zink_program_state.hpp @@ -84,7 +84,7 @@ check_vertex_strides(struct zink_context *ctx) const struct zink_vertex_elements_state *ves = ctx->element_state; for (unsigned i = 0; i < ves->hw_state.num_bindings; i++) { const struct pipe_vertex_buffer *vb = ctx->vertex_buffers + ves->hw_state.binding_map[i]; - unsigned stride = vb->buffer.resource ? vb->stride : 0; + unsigned stride = vb->buffer.resource ? ves->hw_state.b.strides[ves->hw_state.binding_map[i]] : 0; if (stride && stride < ves->min_stride[i]) return false; } @@ -147,7 +147,7 @@ zink_get_gfx_pipeline(struct zink_context *ctx, for (unsigned i = 0; i < state->element_state->num_bindings; i++) { const unsigned buffer_id = ctx->element_state->hw_state.binding_map[i]; struct pipe_vertex_buffer *vb = ctx->vertex_buffers + buffer_id; - state->vertex_strides[buffer_id] = vb->buffer.resource ? vb->stride : 0; + state->vertex_strides[buffer_id] = vb->buffer.resource ? state->element_state->b.strides[buffer_id] : 0; hash = XXH32(&state->vertex_strides[buffer_id], sizeof(uint32_t), hash); } state->vertex_hash = hash ^ state->element_state->hash; diff --git a/src/gallium/drivers/zink/zink_state.c b/src/gallium/drivers/zink/zink_state.c index a4934fb1173..df989a296eb 100644 --- a/src/gallium/drivers/zink/zink_state.c +++ b/src/gallium/drivers/zink/zink_state.c @@ -57,6 +57,7 @@ zink_create_vertex_elements_state(struct pipe_context *pctx, uint32_t size8 = 0; uint32_t size16 = 0; uint32_t size32 = 0; + uint16_t strides[PIPE_MAX_ATTRIBS]; for (i = 0; i < num_elements; ++i) { const struct pipe_vertex_element *elem = elements + i; @@ -112,12 +113,14 @@ zink_create_vertex_elements_state(struct pipe_context *pctx, ves->hw_state.dynattribs[i].binding = binding; ves->hw_state.dynattribs[i].location = i; ves->hw_state.dynattribs[i].format = format; + strides[binding] = elem->src_stride; assert(ves->hw_state.dynattribs[i].format != VK_FORMAT_UNDEFINED); ves->hw_state.dynattribs[i].offset = elem->src_offset; } else { ves->hw_state.attribs[i].binding = binding; ves->hw_state.attribs[i].location = i; ves->hw_state.attribs[i].format = format; + ves->hw_state.b.strides[elem->vertex_buffer_index] = elem->src_stride; assert(ves->hw_state.attribs[i].format != VK_FORMAT_UNDEFINED); ves->hw_state.attribs[i].offset = elem->src_offset; ves->min_stride[binding] = MAX2(ves->min_stride[binding], elem->src_offset + vk_format_get_blocksize(format)); @@ -154,6 +157,7 @@ zink_create_vertex_elements_state(struct pipe_context *pctx, ves->hw_state.dynbindings[i].sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT; ves->hw_state.dynbindings[i].binding = ves->bindings[i].binding; ves->hw_state.dynbindings[i].inputRate = ves->bindings[i].inputRate; + ves->hw_state.dynbindings[i].stride = strides[i]; if (ves->divisor[i]) ves->hw_state.dynbindings[i].divisor = ves->divisor[i]; else @@ -803,10 +807,6 @@ zink_create_vertex_state(struct pipe_screen *pscreen, struct zink_context ctx; ctx.base.screen = pscreen; struct zink_vertex_elements_state *elems = zink_create_vertex_elements_state(&ctx.base, num_elements, elements); - for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) { - if (zink_screen(pscreen)->info.have_EXT_vertex_input_dynamic_state) - elems->hw_state.dynbindings[i].stride = buffer->stride; - } zstate->velems = *elems; zink_delete_vertex_elements_state(&ctx.base, elems); diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index d3dfecefd0d..99c91ac1613 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -301,6 +301,7 @@ struct zink_vertex_elements_hw_state { struct { VkVertexInputBindingDivisorDescriptionEXT divisors[PIPE_MAX_ATTRIBS]; VkVertexInputBindingDescription bindings[PIPE_MAX_ATTRIBS]; // combination of element_state and stride + unsigned strides[PIPE_MAX_ATTRIBS]; uint8_t divisors_present; } b; VkVertexInputBindingDescription2EXT dynbindings[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/frontends/d3d10umd/Draw.cpp b/src/gallium/frontends/d3d10umd/Draw.cpp index 1b7db2eb788..f3fecb63703 100644 --- a/src/gallium/frontends/d3d10umd/Draw.cpp +++ b/src/gallium/frontends/d3d10umd/Draw.cpp @@ -52,12 +52,15 @@ ClampedUAdd(unsigned a, } +/* stride is required in order to set the element data */ static void update_velems(Device *pDevice) { if (!pDevice->velems_changed) return; + for (unsigned i = 0; i < pDevice->velems.count; i++) + pDevice->element_layout->velems.velems[i].src_stride = pDevice->vertex_strides[pDevice->element_layout->velems.velems[i].vertex_buffer_index]; cso_set_vertex_elements(pDevice->cso, &pDevice->element_layout->velems); pDevice->velems_changed = false; diff --git a/src/gallium/frontends/d3d10umd/InputAssembly.cpp b/src/gallium/frontends/d3d10umd/InputAssembly.cpp index dd4ebfcae80..f9d191c3a19 100644 --- a/src/gallium/frontends/d3d10umd/InputAssembly.cpp +++ b/src/gallium/frontends/d3d10umd/InputAssembly.cpp @@ -145,7 +145,7 @@ IaSetVertexBuffers(D3D10DDI_HDEVICE hDevice, } if (resource) { - vb->stride = pStrides[i]; + pDevice->vertex_strides[StartBuffer + i] = pStrides[i]; vb->buffer_offset = pOffsets[i]; if (vb->is_user_buffer) { vb->buffer.resource = NULL; @@ -154,7 +154,7 @@ IaSetVertexBuffers(D3D10DDI_HDEVICE hDevice, pipe_resource_reference(&vb->buffer.resource, resource); } else { - vb->stride = 0; + pDevice->vertex_strides[StartBuffer + i] = 0; vb->buffer_offset = 0; if (!vb->is_user_buffer) { pipe_resource_reference(&vb->buffer.resource, NULL); @@ -169,7 +169,7 @@ IaSetVertexBuffers(D3D10DDI_HDEVICE hDevice, /* XXX this is odd... */ if (!vb->is_user_buffer && !vb->buffer.resource) { - vb->stride = 0; + pDevice->vertex_strides[i]->stride = 0; vb->buffer_offset = 0; vb->is_user_buffer = true; vb->buffer.user = dummy; diff --git a/src/gallium/frontends/d3d10umd/State.h b/src/gallium/frontends/d3d10umd/State.h index 753046e229e..46fde9c5c78 100644 --- a/src/gallium/frontends/d3d10umd/State.h +++ b/src/gallium/frontends/d3d10umd/State.h @@ -70,6 +70,7 @@ struct Device struct cso_context *cso; struct pipe_framebuffer_state fb; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + unsigned vertex_strides[PIPE_MAX_ATTRIBS]; struct pipe_resource *index_buffer; unsigned restart_index; unsigned index_size; diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c index 4bba8c34ec2..cc1b84f394a 100644 --- a/src/gallium/frontends/lavapipe/lvp_execute.c +++ b/src/gallium/frontends/lavapipe/lvp_execute.c @@ -146,6 +146,8 @@ struct rendering_state { int num_const_bufs[LVP_SHADER_STAGES]; int num_vb; unsigned start_vb; + bool vb_strides_dirty; + unsigned vb_strides[PIPE_MAX_ATTRIBS]; struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS]; size_t vb_sizes[PIPE_MAX_ATTRIBS]; //UINT32_MAX for unset uint8_t vertex_buffer_index[PIPE_MAX_ATTRIBS]; /* temp storage to sort for start_vb */ @@ -489,6 +491,12 @@ static void emit_state(struct rendering_state *state) state->stencil_ref_dirty = false; } + if (state->vb_strides_dirty) { + for (unsigned i = 0; i < state->velem.count; i++) + state->velem.velems[i].src_stride = state->vb_strides[state->velem.velems[i].vertex_buffer_index]; + state->vb_strides_dirty = false; + } + if (state->vb_dirty) { cso_set_vertex_buffers(state->cso, state->num_vb, 0, false, state->vb); state->vb_dirty = false; @@ -961,8 +969,10 @@ static void handle_graphics_pipeline(struct lvp_pipeline *pipeline, if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES)) { if (ps->vi) { - u_foreach_bit(b, ps->vi->bindings_valid) - state->vb[b].stride = ps->vi->bindings[b].stride; + u_foreach_bit(a, ps->vi->attributes_valid) { + uint32_t b = ps->vi->attributes[a].binding; + state->velem.velems[a].src_stride = ps->vi->bindings[b].stride; + } state->vb_dirty = true; } } @@ -1098,8 +1108,10 @@ static void handle_vertex_buffers2(struct vk_cmd_queue_entry *cmd, state->vb_sizes[idx] = UINT32_MAX; } - if (vcb->strides) - state->vb[idx].stride = vcb->strides[i]; + if (vcb->strides) { + state->vb_strides[idx] = vcb->strides[i]; + state->vb_strides_dirty = true; + } } if (vcb->first_binding < state->start_vb) state->start_vb = vcb->first_binding; @@ -3307,7 +3319,7 @@ static void handle_set_vertex_input(struct vk_cmd_queue_entry *cmd, state->velem.velems[location].src_offset = attrs[i].offset; state->vertex_buffer_index[location] = attrs[i].binding; state->velem.velems[location].src_format = lvp_vk_format_to_pipe_format(attrs[i].format); - state->vb[attrs[i].binding].stride = binding->stride; + state->velem.velems[location].src_stride = binding->stride; uint32_t d = binding->divisor; switch (binding->inputRate) { case VK_VERTEX_INPUT_RATE_VERTEX: diff --git a/src/gallium/frontends/nine/buffer9.c b/src/gallium/frontends/nine/buffer9.c index d5931943acb..2720ba8740a 100644 --- a/src/gallium/frontends/nine/buffer9.c +++ b/src/gallium/frontends/nine/buffer9.c @@ -232,7 +232,7 @@ NineBuffer9_RebindIfRequired( struct NineBuffer9 *This, nine_context_set_stream_source_apply(device, i, resource, device->state.vtxbuf[i].buffer_offset + offset, - device->state.vtxbuf[i].stride); + device->state.vtxstride[i]); } if (device->state.idxbuf == (struct NineIndexBuffer9 *)This) nine_context_set_indices_apply(device, resource, diff --git a/src/gallium/frontends/nine/device9.c b/src/gallium/frontends/nine/device9.c index a54beab8111..16c9a8bc234 100644 --- a/src/gallium/frontends/nine/device9.c +++ b/src/gallium/frontends/nine/device9.c @@ -3045,7 +3045,7 @@ NineDevice9_DrawPrimitive( struct NineDevice9 *This, /* Tracking for dynamic SYSTEMMEM */ for (i = 0; i < This->caps.MaxStreams; i++) { - unsigned stride = This->state.vtxbuf[i].stride; + unsigned stride = This->state.vtxstride[i]; if (IS_SYSTEMMEM_DYNAMIC((struct NineBuffer9*)This->state.stream[i])) { unsigned start = This->state.vtxbuf[i].buffer_offset + StartVertex * stride; unsigned full_size = This->state.stream[i]->base.size; @@ -3092,7 +3092,7 @@ NineDevice9_DrawIndexedPrimitive( struct NineDevice9 *This, for (i = 0; i < This->caps.MaxStreams; i++) { if (IS_SYSTEMMEM_DYNAMIC((struct NineBuffer9*)This->state.stream[i])) { - uint32_t stride = This->state.vtxbuf[i].stride; + uint32_t stride = This->state.vtxstride[i]; uint32_t full_size = This->state.stream[i]->base.size; uint32_t start, stop; @@ -3198,7 +3198,6 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, base = MinVertexIndex * VertexStreamZeroStride; vbuf.is_user_buffer = false; vbuf.buffer.resource = NULL; - vbuf.stride = VertexStreamZeroStride; u_upload_data(This->vertex_uploader, base, NumVertices * VertexStreamZeroStride, /* XXX */ @@ -3225,6 +3224,7 @@ NineDevice9_DrawIndexedPrimitiveUP( struct NineDevice9 *This, MinVertexIndex, NumVertices, PrimitiveCount, + VertexStreamZeroStride, &vbuf, ibuf, ibuf ? NULL : (void*)pIndexData, @@ -3758,17 +3758,17 @@ NineDevice9_SetStreamSource( struct NineDevice9 *This, if (unlikely(This->is_recording)) { nine_bind(&state->stream[i], pStreamData); state->changed.vtxbuf |= 1 << StreamNumber; - state->vtxbuf[i].stride = Stride; + state->vtxstride[i] = Stride; state->vtxbuf[i].buffer_offset = OffsetInBytes; return D3D_OK; } if (state->stream[i] == NineVertexBuffer9(pStreamData) && - state->vtxbuf[i].stride == Stride && + state->vtxstride[i] == Stride && state->vtxbuf[i].buffer_offset == OffsetInBytes) return D3D_OK; - state->vtxbuf[i].stride = Stride; + state->vtxstride[i] = Stride; state->vtxbuf[i].buffer_offset = OffsetInBytes; NineBindBufferToDevice(This, @@ -3791,7 +3791,7 @@ NineDevice9_SetStreamSourceNULL( struct NineDevice9 *This ) DBG("This=%p\n", This); - state->vtxbuf[0].stride = 0; + state->vtxstride[0] = 0; state->vtxbuf[0].buffer_offset = 0; if (!state->stream[0]) @@ -3816,7 +3816,7 @@ NineDevice9_GetStreamSource( struct NineDevice9 *This, user_assert(ppStreamData && pOffsetInBytes && pStride, D3DERR_INVALIDCALL); nine_reference_set(ppStreamData, state->stream[i]); - *pStride = state->vtxbuf[i].stride; + *pStride = state->vtxstride[i]; *pOffsetInBytes = state->vtxbuf[i].buffer_offset; return D3D_OK; diff --git a/src/gallium/frontends/nine/nine_csmt_helper.h b/src/gallium/frontends/nine/nine_csmt_helper.h index 30d80706cce..a6998ad9662 100644 --- a/src/gallium/frontends/nine/nine_csmt_helper.h +++ b/src/gallium/frontends/nine/nine_csmt_helper.h @@ -30,9 +30,9 @@ _31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \ _41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \ _51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \ - _61,_62,_63,N,...) N + _61,_62,_63,_64,_65,_66,_67,_68,_69,_70,N,...) N #define __RSEQ_N() \ - 63,62,61,60, \ + 70,69,68,67,66,65,64,63,62,61,60, \ 59,58,57,56,55,54,53,52,51,50, \ 49,48,47,46,45,44,43,42,41,40, \ 39,38,37,36,35,34,33,32,31,30, \ @@ -52,6 +52,7 @@ #define _args_for_bypass_56(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_49(__VA_ARGS__) #define _args_for_bypass_63(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_56(__VA_ARGS__) #define _args_for_bypass_70(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_63(__VA_ARGS__) +#define _args_for_bypass_77(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_70(__VA_ARGS__) #define _GFUNC_(n) _args_for_bypass_##n #define _GFUNC(n) _GFUNC_(n) @@ -69,6 +70,7 @@ #define _args_for_mem_56(a, b, c, d, e, f, g, ...) f; _args_for_mem_49(__VA_ARGS__) #define _args_for_mem_63(a, b, c, d, e, f, g, ...) f; _args_for_mem_56(__VA_ARGS__) #define _args_for_mem_70(a, b, c, d, e, f, g, ...) f; _args_for_mem_63(__VA_ARGS__) +#define _args_for_mem_77(a, b, c, d, e, f, g, ...) f; _args_for_mem_70(__VA_ARGS__) #define _FFUNC_(n) _args_for_mem_##n #define _FFUNC(n) _FFUNC_(n) @@ -86,6 +88,7 @@ #define _args_for_unbind_56(a, b, c, d, e, f, g, ...) e; _args_for_unbind_49(__VA_ARGS__) #define _args_for_unbind_63(a, b, c, d, e, f, g, ...) e; _args_for_unbind_56(__VA_ARGS__) #define _args_for_unbind_70(a, b, c, d, e, f, g, ...) e; _args_for_unbind_63(__VA_ARGS__) +#define _args_for_unbind_77(a, b, c, d, e, f, g, ...) e; _args_for_unbind_70(__VA_ARGS__) #define _EFUNC_(n) _args_for_unbind_##n #define _EFUNC(n) _EFUNC_(n) @@ -103,6 +106,7 @@ #define _args_for_call_56(a, b, c, d, e, f, g, ...) ,d _args_for_call_49(__VA_ARGS__) #define _args_for_call_63(a, b, c, d, e, f, g, ...) ,d _args_for_call_56(__VA_ARGS__) #define _args_for_call_70(a, b, c, d, e, f, g, ...) ,d _args_for_call_63(__VA_ARGS__) +#define _args_for_call_77(a, b, c, d, e, f, g, ...) ,d _args_for_call_70(__VA_ARGS__) #define _DFUNC_(n) _args_for_call_##n #define _DFUNC(n) _DFUNC_(n) @@ -120,6 +124,7 @@ #define _args_for_decl_56(a, b, c, d, e, f, g, ...) ,c _args_for_decl_49(__VA_ARGS__) #define _args_for_decl_63(a, b, c, d, e, f, g, ...) ,c _args_for_decl_56(__VA_ARGS__) #define _args_for_decl_70(a, b, c, d, e, f, g, ...) ,c _args_for_decl_63(__VA_ARGS__) +#define _args_for_decl_77(a, b, c, d, e, f, g, ...) ,c _args_for_decl_70(__VA_ARGS__) #define _CFUNC_(n) _args_for_decl_##n #define _CFUNC(n) _CFUNC_(n) @@ -137,6 +142,7 @@ #define _args_for_assign_56(a, b, c, d, e, f, g, ...) b; _args_for_assign_49(__VA_ARGS__) #define _args_for_assign_63(a, b, c, d, e, f, g, ...) b; _args_for_assign_56(__VA_ARGS__) #define _args_for_assign_70(a, b, c, d, e, f, g, ...) b; _args_for_assign_63(__VA_ARGS__) +#define _args_for_assign_77(a, b, c, d, e, f, g, ...) b; _args_for_assign_70(__VA_ARGS__) #define _BFUNC_(n) _args_for_assign_##n #define _BFUNC(n) _BFUNC_(n) @@ -154,6 +160,7 @@ #define _args_for_struct_56(a, b, c, d, e, f, g, ...) a; _args_for_struct_49(__VA_ARGS__) #define _args_for_struct_63(a, b, c, d, e, f, g, ...) a; _args_for_struct_56(__VA_ARGS__) #define _args_for_struct_70(a, b, c, d, e, f, g, ...) a; _args_for_struct_63(__VA_ARGS__) +#define _args_for_struct_77(a, b, c, d, e, f, g, ...) a; _args_for_struct_70(__VA_ARGS__) #define _AFUNC_(n) _args_for_struct_##n #define _AFUNC(n) _AFUNC_(n) diff --git a/src/gallium/frontends/nine/nine_state.c b/src/gallium/frontends/nine/nine_state.c index cb9e5a53c38..fb7caba7900 100644 --- a/src/gallium/frontends/nine/nine_state.c +++ b/src/gallium/frontends/nine/nine_state.c @@ -921,6 +921,7 @@ update_vertex_elements(struct NineDevice9 *device) ve.velems[n].vertex_buffer_index = vtxbuf_holes_map[ve.velems[n].vertex_buffer_index]; b = ve.velems[n].vertex_buffer_index; + ve.velems[n].src_stride = context->vtxstride[b]; context->stream_usage_mask |= 1 << b; /* XXX wine just uses 1 here: */ if (context->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA) @@ -933,6 +934,7 @@ update_vertex_elements(struct NineDevice9 *device) ve.velems[n].vertex_buffer_index = vtxbuf_holes_map[dummy_vbo_stream]; ve.velems[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; ve.velems[n].src_offset = 0; + ve.velems[n].src_stride = 0; ve.velems[n].instance_divisor = 0; ve.velems[n].dual_slot = false; } @@ -969,7 +971,6 @@ update_vertex_buffers(struct NineDevice9 *device) vtxbuf_i = u_bit_scan(&mask); if (vtxbuf_i == context->dummy_vbo_bound_at) { vbuffer[i].buffer.resource = device->dummy_vbo; - vbuffer[i].stride = 0; vbuffer[i].is_user_buffer = false; vbuffer[i].buffer_offset = 0; } else { @@ -1615,10 +1616,13 @@ CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_apply, * but not for *Up draws */ if (context->vtxbuf[i].buffer.resource == res && context->vtxbuf[i].buffer_offset == OffsetInBytes && - context->vtxbuf[i].stride == Stride) + context->vtxstride[i] == Stride) return; - context->vtxbuf[i].stride = Stride; + if (context->vtxstride[i] != Stride) { + context->vtxstride[i] = Stride; + context->changed.group |= NINE_STATE_VDECL; + } context->vtxbuf[i].buffer_offset = OffsetInBytes; pipe_resource_reference(&context->vtxbuf[i].buffer.resource, res); @@ -2145,7 +2149,7 @@ nine_context_apply_stateblock(struct NineDevice9 *device, uint32_t m = src->changed.vtxbuf | src->changed.stream_freq; for (i = 0; m; ++i, m >>= 1) { if (src->changed.vtxbuf & (1 << i)) - nine_context_set_stream_source(device, i, src->stream[i], src->vtxbuf[i].buffer_offset, src->vtxbuf[i].stride); + nine_context_set_stream_source(device, i, src->stream[i], src->vtxbuf[i].buffer_offset, src->vtxstride[i]); if (src->changed.stream_freq & (1 << i)) nine_context_set_stream_source_freq(device, i, src->stream_freq[i]); } @@ -2505,6 +2509,7 @@ CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf, ARG_VAL(UINT, MinVertexIndex), ARG_VAL(UINT, NumVertices), ARG_VAL(UINT, PrimitiveCount), + ARG_VAL(UINT, vbuf_stride), ARG_BIND_VBUF(struct pipe_vertex_buffer, vbuf), ARG_BIND_RES(struct pipe_resource, ibuf), ARG_VAL(void *, user_ibuf), @@ -2518,6 +2523,14 @@ CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf, if (context->vs && context->vs->swvp_only && !context->swvp) return; + if (context->vtxstride[0] != vbuf_stride) { + context->vtxstride[0] = vbuf_stride; + /* force elements update for stride. + * We don't need to restore the old value, + * as the caller set its to 0 after the call */ + context->changed.group |= NINE_STATE_VDECL; + } + nine_update_state(device); init_draw_info(&info, &draw, device, PrimitiveType, PrimitiveCount); @@ -3220,7 +3233,6 @@ update_vertex_buffers_sw(struct NineDevice9 *device, int dummy_vbo_stream, if (dummy_vbo_stream == i) { vbuffer[j].buffer.resource = NULL; pipe_resource_reference(&vbuffer[j].buffer.resource, device->dummy_vbo_sw); - vbuffer[j].stride = 0; vbuffer[j].is_user_buffer = false; vbuffer[j].buffer_offset = 0; j++; @@ -3234,10 +3246,10 @@ update_vertex_buffers_sw(struct NineDevice9 *device, int dummy_vbo_stream, buf = NineVertexBuffer9_GetResource(state->stream[i], &offset); DBG("Locking %p (offset %d, length %d)\n", buf, - vbuffer[j].buffer_offset, num_vertices * vbuffer[j].stride); + vbuffer[j].buffer_offset, num_vertices * state->vtxstride[i]); u_box_1d(vbuffer[j].buffer_offset + offset + start_vertice * - vbuffer[j].stride, num_vertices * vbuffer[j].stride, &box); + state->vtxstride[i], num_vertices * state->vtxstride[i], &box); userbuf = pipe->buffer_map(pipe, buf, 0, PIPE_MAP_READ, &box, &(sw_internal->transfers_so[i])); diff --git a/src/gallium/frontends/nine/nine_state.h b/src/gallium/frontends/nine/nine_state.h index d32e3e98a5b..73240715570 100644 --- a/src/gallium/frontends/nine/nine_state.h +++ b/src/gallium/frontends/nine/nine_state.h @@ -218,6 +218,7 @@ struct nine_state uint32_t stream_mask; /* i bit set for *stream[i] not NULL */ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; /* vtxbuf.buffer unused */ unsigned last_vtxbuf_count; + uint16_t vtxstride[PIPE_MAX_ATTRIBS]; UINT stream_freq[PIPE_MAX_ATTRIBS]; struct pipe_clip_state clip; @@ -287,6 +288,7 @@ struct nine_context { struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; uint32_t vtxbuf_mask; /* i bit set for context->vtxbuf[i].buffer.resource not NULL */ uint32_t last_vtxbuf_count; + uint16_t vtxstride[PIPE_MAX_ATTRIBS]; UINT stream_freq[PIPE_MAX_ATTRIBS]; uint32_t stream_instancedata_mask; /* derived from stream_freq */ uint32_t stream_usage_mask; /* derived from VS and vdecl */ @@ -549,11 +551,12 @@ nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf(struct NineDevice9 *devic UINT MinVertexIndex, UINT NumVertices, UINT PrimitiveCount, + unsigned vbuf_stride, struct pipe_vertex_buffer *vbuf, struct pipe_resource *ibuf, void *user_ibuf, unsigned index_offset, - unsigned index_size); + unsigned index_size); void nine_context_resource_copy_region(struct NineDevice9 *device, diff --git a/src/gallium/frontends/nine/stateblock9.c b/src/gallium/frontends/nine/stateblock9.c index 32dc5244e20..f6fbf4f2b2b 100644 --- a/src/gallium/frontends/nine/stateblock9.c +++ b/src/gallium/frontends/nine/stateblock9.c @@ -283,7 +283,7 @@ nine_state_copy_common(struct NineDevice9 *device, (struct NineBuffer9 *)src->stream[i]); if (src->stream[i]) { dst->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset; - dst->vtxbuf[i].stride = src->vtxbuf[i].stride; + dst->vtxstride[i] = src->vtxstride[i]; } } if (mask->changed.stream_freq & (1 << i)) @@ -463,7 +463,7 @@ nine_state_copy_common_all(struct NineDevice9 *device, (struct NineBuffer9 *)src->stream[i]); if (src->stream[i]) { dst->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset; - dst->vtxbuf[i].stride = src->vtxbuf[i].stride; + dst->vtxstride[i] = src->vtxstride[i]; } dst->stream_freq[i] = src->stream_freq[i]; } diff --git a/src/gallium/frontends/xa/xa_renderer.c b/src/gallium/frontends/xa/xa_renderer.c index 99ddd32c51a..a638d35b54e 100644 --- a/src/gallium/frontends/xa/xa_renderer.c +++ b/src/gallium/frontends/xa/xa_renderer.c @@ -91,6 +91,8 @@ renderer_draw(struct xa_context *r) struct cso_velems_state velems; velems.count = r->attrs_per_vertex; memcpy(velems.velems, r->velems, sizeof(r->velems[0]) * velems.count); + for (unsigned i = 0; i < velems.count; i++) + velems.velems[i].src_stride = velems.count * 4 * sizeof(float); cso_set_vertex_elements(r->cso, &velems); util_draw_user_vertex_buffer(r->cso, r->buffer, MESA_PRIM_QUADS, diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index e1953e1af83..cd55ee04afa 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -650,7 +650,6 @@ struct pipe_transfer */ struct pipe_vertex_buffer { - uint16_t stride; /**< stride to same attrib in next vertex, in bytes */ bool is_user_buffer; unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */ @@ -742,6 +741,9 @@ struct pipe_vertex_element */ uint8_t src_format; /* low 8 bits of enum pipe_format. */ + /**< stride to same attrib in next vertex, in bytes */ + uint32_t src_stride; /* technically only uint16_t, expanded for struct padding */ + /** Instance data rate divisor. 0 means this is per-vertex data, * n means per-instance data used for n consecutive instances (n > 0). */ diff --git a/src/gallium/tests/trivial/quad-tex.c b/src/gallium/tests/trivial/quad-tex.c index 7ce51a04225..43c5ac4f8a5 100644 --- a/src/gallium/tests/trivial/quad-tex.c +++ b/src/gallium/tests/trivial/quad-tex.c @@ -264,11 +264,13 @@ static void init_prog(struct program *p) p->velem.velems[0].instance_divisor = 0; p->velem.velems[0].vertex_buffer_index = 0; p->velem.velems[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + p->velem.velems[0].src_stride = 2 * 4 * sizeof(float); p->velem.velems[1].src_offset = 1 * 4 * sizeof(float); /* offset 16, second element */ p->velem.velems[1].instance_divisor = 0; p->velem.velems[1].vertex_buffer_index = 0; p->velem.velems[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + p->velem.velems[1].src_stride = 2 * 4 * sizeof(float); /* vertex shader */ { diff --git a/src/gallium/tests/trivial/tri.c b/src/gallium/tests/trivial/tri.c index f8afc94feb0..a56b2bce9cc 100644 --- a/src/gallium/tests/trivial/tri.c +++ b/src/gallium/tests/trivial/tri.c @@ -208,11 +208,13 @@ static void init_prog(struct program *p) p->velem.velems[0].instance_divisor = 0; p->velem.velems[0].vertex_buffer_index = 0; p->velem.velems[0].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + p->velem.velems[0].src_stride = 2 * 4 * sizeof(float); p->velem.velems[1].src_offset = 1 * 4 * sizeof(float); /* offset 16, second element */ p->velem.velems[1].instance_divisor = 0; p->velem.velems[1].vertex_buffer_index = 0; p->velem.velems[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + p->velem.velems[1].src_stride = 2 * 4 * sizeof(float); /* vertex shader */ { diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index c3150df11a9..bfd4e8fccf4 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -278,6 +278,7 @@ _mesa_bind_vertex_buffer(struct gl_context *ctx, if (binding->BufferObj != vbo || binding->Offset != offset || binding->Stride != stride) { + bool stride_changed = binding->Stride != stride; if (take_vbo_ownership) { _mesa_reference_buffer_object(ctx, &binding->BufferObj, NULL); @@ -298,8 +299,10 @@ _mesa_bind_vertex_buffer(struct gl_context *ctx, if (vao->Enabled & binding->_BoundArrays) { ctx->NewDriverState |= ST_NEW_VERTEX_ARRAYS; - /* Non-dynamic VAOs merge vertex buffers, which affects vertex elements. */ - if (!vao->IsDynamic) + /* Non-dynamic VAOs merge vertex buffers, which affects vertex elements. + * stride changes also require new vertex elements + */ + if (!vao->IsDynamic || stride_changed) ctx->Array.NewVertexElements = true; } diff --git a/src/mesa/state_tracker/st_atom_array.cpp b/src/mesa/state_tracker/st_atom_array.cpp index 02a7b377573..c5b48a7554d 100644 --- a/src/mesa/state_tracker/st_atom_array.cpp +++ b/src/mesa/state_tracker/st_atom_array.cpp @@ -60,10 +60,12 @@ enum st_update_flag { static void ALWAYS_INLINE init_velement(struct pipe_vertex_element *velements, const struct gl_vertex_format *vformat, - int src_offset, unsigned instance_divisor, + int src_offset, unsigned src_stride, + unsigned instance_divisor, int vbo_index, bool dual_slot, int idx) { velements[idx].src_offset = src_offset; + velements[idx].src_stride = src_stride; velements[idx].src_format = vformat->_PipeFormat; velements[idx].instance_divisor = instance_divisor; velements[idx].vertex_buffer_index = vbo_index; @@ -109,13 +111,13 @@ setup_arrays(struct st_context *st, vbuffer[bufidx].is_user_buffer = true; vbuffer[bufidx].buffer_offset = 0; } - vbuffer[bufidx].stride = binding->Stride; /* in bytes */ if (UPDATE == UPDATE_BUFFERS_ONLY) continue; /* Set the vertex element. */ init_velement(velements->velems, &attrib->Format, 0, + binding->Stride, binding->InstanceDivisor, bufidx, dual_slot_inputs & BITFIELD_BIT(attr), util_bitcount_fast(inputs_read & BITFIELD_MASK(attr))); @@ -143,7 +145,6 @@ setup_arrays(struct st_context *st, vbuffer[bufidx].is_user_buffer = true; vbuffer[bufidx].buffer_offset = 0; } - vbuffer[bufidx].stride = binding->Stride; /* in bytes */ const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(binding); GLbitfield attrmask = mask & boundmask; @@ -162,7 +163,7 @@ setup_arrays(struct st_context *st, = _mesa_draw_array_attrib(vao, attr); const GLuint off = _mesa_draw_attributes_relative_offset(attrib); init_velement(velements->velems, &attrib->Format, off, - binding->InstanceDivisor, bufidx, + binding->Stride, binding->InstanceDivisor, bufidx, dual_slot_inputs & BITFIELD_BIT(attr), util_bitcount_fast(inputs_read & BITFIELD_MASK(attr))); } while (attrmask); @@ -217,7 +218,6 @@ st_setup_current(struct st_context *st, vbuffer[bufidx].is_user_buffer = false; vbuffer[bufidx].buffer.resource = NULL; /* vbuffer[bufidx].buffer_offset is set below */ - vbuffer[bufidx].stride = 0; /* Use const_uploader for zero-stride vertex attributes, because * it may use a better memory placement than stream_uploader. @@ -253,7 +253,7 @@ st_setup_current(struct st_context *st, if (UPDATE == UPDATE_ALL) { init_velement(velements->velems, &attrib->Format, cursor - ptr, - 0, bufidx, dual_slot_inputs & BITFIELD_BIT(attr), + 0, 0, bufidx, dual_slot_inputs & BITFIELD_BIT(attr), util_bitcount_fast(inputs_read & BITFIELD_MASK(attr))); } @@ -287,14 +287,13 @@ st_setup_current_user(struct st_context *st, = _mesa_draw_current_attrib(ctx, attr); const unsigned bufidx = (*num_vbuffers)++; - init_velement(velements->velems, &attrib->Format, 0, 0, + init_velement(velements->velems, &attrib->Format, 0, 0, 0, bufidx, dual_slot_inputs & BITFIELD_BIT(attr), util_bitcount(inputs_read & BITFIELD_MASK(attr))); vbuffer[bufidx].is_user_buffer = true; vbuffer[bufidx].buffer.user = attrib->Ptr; vbuffer[bufidx].buffer_offset = 0; - vbuffer[bufidx].stride = 0; } } diff --git a/src/mesa/state_tracker/st_cb_drawtex.c b/src/mesa/state_tracker/st_cb_drawtex.c index 419a0451996..f3e9451cec1 100644 --- a/src/mesa/state_tracker/st_cb_drawtex.c +++ b/src/mesa/state_tracker/st_cb_drawtex.c @@ -269,6 +269,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, velems.velems[i].vertex_buffer_index = 0; velems.velems[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; velems.velems[i].dual_slot = false; + velems.velems[i].src_stride = numAttribs * 4 * sizeof(float); } velems.count = numAttribs; diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index a14174ff25c..4f16fe9bcff 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -523,12 +523,15 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe, st->util_velems.velems[0].src_offset = 0; st->util_velems.velems[0].vertex_buffer_index = 0; st->util_velems.velems[0].src_format = PIPE_FORMAT_R32G32B32_FLOAT; + st->util_velems.velems[0].src_stride = sizeof(struct st_util_vertex); st->util_velems.velems[1].src_offset = 3 * sizeof(float); st->util_velems.velems[1].vertex_buffer_index = 0; st->util_velems.velems[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + st->util_velems.velems[1].src_stride = sizeof(struct st_util_vertex); st->util_velems.velems[2].src_offset = 7 * sizeof(float); st->util_velems.velems[2].vertex_buffer_index = 0; st->util_velems.velems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; + st->util_velems.velems[2].src_stride = sizeof(struct st_util_vertex); } ctx->Const.PackedDriverUniformStorage = diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 5307bf71e13..bcda9b7f4c8 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -422,8 +422,6 @@ st_draw_quad(struct st_context *st, struct pipe_vertex_buffer vb = {0}; struct st_util_vertex *verts; - vb.stride = sizeof(struct st_util_vertex); - u_upload_alloc(st->pipe->stream_uploader, 0, 4 * sizeof(struct st_util_vertex), 4, &vb.buffer_offset, &vb.buffer.resource, (void **) &verts); diff --git a/src/mesa/state_tracker/st_pbo.c b/src/mesa/state_tracker/st_pbo.c index d6316af96c5..648ac7ad826 100644 --- a/src/mesa/state_tracker/st_pbo.c +++ b/src/mesa/state_tracker/st_pbo.c @@ -219,8 +219,6 @@ st_pbo_draw(struct st_context *st, const struct st_pbo_addresses *addr, float *verts = NULL; - vbo.stride = 2 * sizeof(float); - u_upload_alloc(st->pipe->stream_uploader, 0, 8 * sizeof(float), 4, &vbo.buffer_offset, &vbo.buffer.resource, (void **) &verts); if (!verts) @@ -239,6 +237,7 @@ st_pbo_draw(struct st_context *st, const struct st_pbo_addresses *addr, velem.count = 1; velem.velems[0].src_offset = 0; + velem.velems[0].src_stride = 2 * sizeof(float); velem.velems[0].instance_divisor = 0; velem.velems[0].vertex_buffer_index = 0; velem.velems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;