diff --git a/src/broadcom/vulkan/v3dv_cl.h b/src/broadcom/vulkan/v3dv_cl.h index 1784f8e7bb9..c0b0e380786 100644 --- a/src/broadcom/vulkan/v3dv_cl.h +++ b/src/broadcom/vulkan/v3dv_cl.h @@ -155,6 +155,23 @@ void v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space); _loop_terminate = NULL; \ })) \ +#define cl_emit_with_prepacked(cl, packet, prepacked, name) \ + for (struct cl_packet_struct(packet) name = { \ + cl_packet_header(packet) \ + }, \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ + ({ \ + struct v3dv_cl_out *cl_out = cl_start(cl); \ + uint8_t packed[cl_packet_length(packet)]; \ + cl_packet_pack(packet)(cl, packed, &name); \ + for (int _i = 0; _i < cl_packet_length(packet); _i++) \ + ((uint8_t *)cl_out)[_i] = packed[_i] | (prepacked)[_i]; \ + cl_advance(&cl_out, cl_packet_length(packet)); \ + cl_end(cl, cl_out); \ + _loop_terminate = NULL; \ + })) \ + /** * Helper function called by the XML-generated pack functions for filling in * an address field in shader records. diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index fd0984feecf..81131d8d3cf 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -1501,20 +1501,6 @@ emit_viewport(struct v3dv_cmd_buffer *cmd_buffer) } } -/* FIXME: in fact this is not really required at this point, as we don't plan - * to initially support GS, but it is more readable and serves as a - * placeholder, to have the struct and fill it with default values. - */ -struct vpm_config { - uint32_t As; - uint32_t Vc; - uint32_t Gs; - uint32_t Gd; - uint32_t Gv; - uint32_t Ve; - uint32_t gs_width; -}; - static void cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer) { @@ -1560,54 +1546,18 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer) cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD), 32); - struct vpm_config vpm_cfg_bin, vpm_cfg; + cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD, + pipeline->shader_state_record, shader) { - /* FIXME: values below are default when non-GS is available. Would need to - * provide real values if GS gets supported - */ - vpm_cfg_bin.As = 1; - vpm_cfg_bin.Ve = 0; - vpm_cfg_bin.Vc = pipeline->vs_bin->prog_data.vs->vcm_cache_size; - - vpm_cfg.As = 1; - vpm_cfg.Ve = 0; - vpm_cfg.Vc = pipeline->vs->prog_data.vs->vcm_cache_size; - - cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) { - shader.enable_clipping = true; - - shader.point_size_in_shaded_vertex_data = - pipeline->vs->key.vs.per_vertex_point_size; - - /* Must be set if the shader modifies Z, discards, or modifies - * the sample mask. For any of these cases, the fragment - * shader needs to write the Z value (even just discards). + /* FIXME: we are setting this values here and during the + * prepacking. This is because both cl_emit_with_prepacked and v3dv_pack + * asserts for minimum values of these. It would be good to get + * v3dv_pack to assert on the final value if possible */ - shader.fragment_shader_does_z_writes = - pipeline->fs->prog_data.fs->writes_z; - /* Set if the EZ test must be disabled (due to shader side - * effects and the early_z flag not being present in the - * shader). - */ - shader.turn_off_early_z_test = - pipeline->fs->prog_data.fs->disable_ez; - - shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = - pipeline->fs->prog_data.fs->uses_center_w; - - shader.any_shader_reads_hardware_written_primitive_id = false; - - shader.do_scoreboard_wait_on_first_thread_switch = - pipeline->fs->prog_data.fs->lock_scoreboard_on_first_thrsw; - shader.disable_implicit_point_line_varyings = - !pipeline->fs->prog_data.fs->uses_implicit_point_line_varyings; - - shader.number_of_varyings_in_fragment_shader = - pipeline->fs->prog_data.fs->num_inputs; - - shader.coordinate_shader_propagate_nans = true; - shader.vertex_shader_propagate_nans = true; - shader.fragment_shader_propagate_nans = true; + shader.min_coord_shader_input_segments_required_in_play = + pipeline->vpm_cfg_bin.As; + shader.min_vertex_shader_input_segments_required_in_play = + pipeline->vpm_cfg.As; shader.coordinate_shader_code_address = v3dv_cl_address(pipeline->vs_bin->assembly_bo, 0); @@ -1616,63 +1566,10 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer) shader.fragment_shader_code_address = v3dv_cl_address(pipeline->fs->assembly_bo, 0); - /* FIXME: Use combined input/output size flag in the common case (also - * on v3d, see v3dx_draw). - */ - shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = - pipeline->vs_bin->prog_data.vs->separate_segments; - shader.vertex_shader_has_separate_input_and_output_vpm_blocks = - pipeline->vs->prog_data.vs->separate_segments; - - shader.coordinate_shader_input_vpm_segment_size = - pipeline->vs_bin->prog_data.vs->separate_segments ? - pipeline->vs_bin->prog_data.vs->vpm_input_size : 1; - shader.vertex_shader_input_vpm_segment_size = - pipeline->vs->prog_data.vs->separate_segments ? - pipeline->vs->prog_data.vs->vpm_input_size : 1; - - shader.coordinate_shader_output_vpm_segment_size = - pipeline->vs_bin->prog_data.vs->vpm_output_size; - shader.vertex_shader_output_vpm_segment_size = - pipeline->vs->prog_data.vs->vpm_output_size; - shader.coordinate_shader_uniforms_address = vs_bin_uniforms; shader.vertex_shader_uniforms_address = vs_uniforms; shader.fragment_shader_uniforms_address = fs_uniforms; - shader.min_coord_shader_input_segments_required_in_play = - vpm_cfg_bin.As; - shader.min_vertex_shader_input_segments_required_in_play = - vpm_cfg.As; - - shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = - vpm_cfg_bin.Ve; - shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = - vpm_cfg.Ve; - - shader.coordinate_shader_4_way_threadable = - pipeline->vs_bin->prog_data.vs->base.threads == 4; - shader.vertex_shader_4_way_threadable = - pipeline->vs->prog_data.vs->base.threads == 4; - shader.fragment_shader_4_way_threadable = - pipeline->fs->prog_data.fs->base.threads == 4; - - shader.coordinate_shader_start_in_final_thread_section = - pipeline->vs_bin->prog_data.vs->base.single_seg; - shader.vertex_shader_start_in_final_thread_section = - pipeline->vs->prog_data.vs->base.single_seg; - shader.fragment_shader_start_in_final_thread_section = - pipeline->fs->prog_data.fs->base.single_seg; - - shader.vertex_id_read_by_coordinate_shader = - pipeline->vs_bin->prog_data.vs->uses_vid; - shader.instance_id_read_by_coordinate_shader = - pipeline->vs_bin->prog_data.vs->uses_iid; - shader.vertex_id_read_by_vertex_shader = - pipeline->vs->prog_data.vs->uses_vid; - shader.instance_id_read_by_vertex_shader = - pipeline->vs->prog_data.vs->uses_iid; - /* FIXME: I understand that the following is needed only if * vtx_num_elements > 0 */ @@ -1701,8 +1598,8 @@ cmd_buffer_emit_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer) } cl_emit(&job->bcl, VCM_CACHE_SIZE, vcm) { - vcm.number_of_16_vertex_batches_for_binning = vpm_cfg_bin.Vc; - vcm.number_of_16_vertex_batches_for_rendering = vpm_cfg.Vc; + vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc; + vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc; } cl_emit(&job->bcl, GL_SHADER_STATE, state) { diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index 75b67b4759e..338af12cf33 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -683,6 +683,17 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, } } + /* FIXME: values below are default when non-GS is available. Would need to + * provide real values if GS gets supported + */ + pipeline->vpm_cfg_bin.As = 1; + pipeline->vpm_cfg_bin.Ve = 0; + pipeline->vpm_cfg_bin.Vc = pipeline->vs_bin->prog_data.vs->vcm_cache_size; + + pipeline->vpm_cfg.As = 1; + pipeline->vpm_cfg.Ve = 0; + pipeline->vpm_cfg.Vc = pipeline->vs->prog_data.vs->vcm_cache_size; + return VK_SUCCESS; } @@ -814,6 +825,122 @@ pack_cfg_bits(struct v3dv_pipeline *pipeline, }; } +static void +pack_shader_state_record(struct v3dv_pipeline *pipeline) +{ + assert(sizeof(pipeline->shader_state_record) == + cl_packet_length(GL_SHADER_STATE_RECORD)); + + /* Note: we are not packing addresses, as we need the job (see + * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this + * point as they depend on dynamic info that can be set after create the + * pipeline (like viewport), . Would need to be filled later, so we are + * doing a partial prepacking. + */ + v3dv_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) { + shader.enable_clipping = true; + + shader.point_size_in_shaded_vertex_data = + pipeline->vs->key.vs.per_vertex_point_size; + + /* Must be set if the shader modifies Z, discards, or modifies + * the sample mask. For any of these cases, the fragment + * shader needs to write the Z value (even just discards). + */ + shader.fragment_shader_does_z_writes = + pipeline->fs->prog_data.fs->writes_z; + /* Set if the EZ test must be disabled (due to shader side + * effects and the early_z flag not being present in the + * shader). + */ + shader.turn_off_early_z_test = + pipeline->fs->prog_data.fs->disable_ez; + + shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = + pipeline->fs->prog_data.fs->uses_center_w; + + shader.any_shader_reads_hardware_written_primitive_id = false; + + shader.do_scoreboard_wait_on_first_thread_switch = + pipeline->fs->prog_data.fs->lock_scoreboard_on_first_thrsw; + shader.disable_implicit_point_line_varyings = + !pipeline->fs->prog_data.fs->uses_implicit_point_line_varyings; + + shader.number_of_varyings_in_fragment_shader = + pipeline->fs->prog_data.fs->num_inputs; + + shader.coordinate_shader_propagate_nans = true; + shader.vertex_shader_propagate_nans = true; + shader.fragment_shader_propagate_nans = true; + + /* Note: see previous note about adresses */ + /* shader.coordinate_shader_code_address */ + /* shader.vertex_shader_code_address */ + /* shader.fragment_shader_code_address */ + + /* FIXME: Use combined input/output size flag in the common case (also + * on v3d, see v3dx_draw). + */ + shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = + pipeline->vs_bin->prog_data.vs->separate_segments; + shader.vertex_shader_has_separate_input_and_output_vpm_blocks = + pipeline->vs->prog_data.vs->separate_segments; + + shader.coordinate_shader_input_vpm_segment_size = + pipeline->vs_bin->prog_data.vs->separate_segments ? + pipeline->vs_bin->prog_data.vs->vpm_input_size : 1; + shader.vertex_shader_input_vpm_segment_size = + pipeline->vs->prog_data.vs->separate_segments ? + pipeline->vs->prog_data.vs->vpm_input_size : 1; + + shader.coordinate_shader_output_vpm_segment_size = + pipeline->vs_bin->prog_data.vs->vpm_output_size; + shader.vertex_shader_output_vpm_segment_size = + pipeline->vs->prog_data.vs->vpm_output_size; + + /* Note: see previous note about adresses */ + /* shader.coordinate_shader_uniforms_address */ + /* shader.vertex_shader_uniforms_address */ + /* shader.fragment_shader_uniforms_address */ + + shader.min_coord_shader_input_segments_required_in_play = + pipeline->vpm_cfg_bin.As; + shader.min_vertex_shader_input_segments_required_in_play = + pipeline->vpm_cfg.As; + + shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = + pipeline->vpm_cfg_bin.Ve; + shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size = + pipeline->vpm_cfg.Ve; + + shader.coordinate_shader_4_way_threadable = + pipeline->vs_bin->prog_data.vs->base.threads == 4; + shader.vertex_shader_4_way_threadable = + pipeline->vs->prog_data.vs->base.threads == 4; + shader.fragment_shader_4_way_threadable = + pipeline->fs->prog_data.fs->base.threads == 4; + + shader.coordinate_shader_start_in_final_thread_section = + pipeline->vs_bin->prog_data.vs->base.single_seg; + shader.vertex_shader_start_in_final_thread_section = + pipeline->vs->prog_data.vs->base.single_seg; + shader.fragment_shader_start_in_final_thread_section = + pipeline->fs->prog_data.fs->base.single_seg; + + shader.vertex_id_read_by_coordinate_shader = + pipeline->vs_bin->prog_data.vs->uses_vid; + shader.instance_id_read_by_coordinate_shader = + pipeline->vs_bin->prog_data.vs->uses_iid; + shader.vertex_id_read_by_vertex_shader = + pipeline->vs->prog_data.vs->uses_vid; + shader.instance_id_read_by_vertex_shader = + pipeline->vs->prog_data.vs->uses_iid; + + /* Note: see previous note about adresses */ + /* shader.address_of_default_attribute_values */ + } +} + static VkResult pipeline_init(struct v3dv_pipeline *pipeline, struct v3dv_device *device, @@ -856,6 +983,8 @@ pipeline_init(struct v3dv_pipeline *pipeline, return result; } + pack_shader_state_record(pipeline); + return result; } diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 8f617f29c6d..3087df210db 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -583,6 +583,20 @@ struct v3dv_pipeline_stage { struct v3dv_bo *assembly_bo; }; +/* FIXME: although the full vpm_config is not required at this point, as we + * don't plan to initially support GS, it is more readable and serves as a + * placeholder, to have the struct and fill it with default values. + */ +struct vpm_config { + uint32_t As; + uint32_t Vc; + uint32_t Gs; + uint32_t Gd; + uint32_t Gv; + uint32_t Ve; + uint32_t gs_width; +}; + struct v3dv_pipeline { struct v3dv_device *device; @@ -599,9 +613,12 @@ struct v3dv_pipeline { struct v3dv_dynamic_state dynamic_state; + struct vpm_config vpm_cfg; + struct vpm_config vpm_cfg_bin; /* Packets prepacked during pipeline creation */ uint8_t cfg_bits[cl_packet_length(CFG_BITS)]; + uint8_t shader_state_record[cl_packet_length(GL_SHADER_STATE_RECORD)]; }; uint32_t v3dv_physical_device_api_version(struct v3dv_physical_device *dev);