diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c index da104f64d7b..ee9eee9d000 100644 --- a/src/broadcom/vulkan/v3dv_cmd_buffer.c +++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c @@ -1727,7 +1727,7 @@ cmd_buffer_update_ez_state(struct v3dv_cmd_buffer *cmd_buffer, } /* If the FS writes Z, then it may update against the chosen EZ direction */ - if (pipeline->fs->prog_data.fs->writes_z) + if (pipeline->fs->current_variant->prog_data.fs->writes_z) job->ez_state = VC5_EZ_DISABLED; if (job->first_ez_state == VC5_EZ_UNDECIDED && @@ -2186,14 +2186,14 @@ emit_varyings_state(struct v3dv_cmd_buffer *cmd_buffer) struct v3dv_job *job = cmd_buffer->state.job; struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct v3d_fs_prog_data *prog_data_fs = + pipeline->fs->current_variant->prog_data.fs; + const uint32_t num_flags = - ARRAY_SIZE(pipeline->fs->prog_data.fs->flat_shade_flags); - const uint32_t *flat_shade_flags = - pipeline->fs->prog_data.fs->flat_shade_flags; - const uint32_t *noperspective_flags = - pipeline->fs->prog_data.fs->noperspective_flags; - const uint32_t *centroid_flags = - pipeline->fs->prog_data.fs->centroid_flags; + ARRAY_SIZE(prog_data_fs->flat_shade_flags); + const uint32_t *flat_shade_flags = prog_data_fs->flat_shade_flags; + const uint32_t *noperspective_flags = prog_data_fs->noperspective_flags; + const uint32_t *centroid_flags = prog_data_fs->centroid_flags; if (!emit_varying_flags(job, num_flags, flat_shade_flags, emit_flat_shade_flags)) { @@ -2243,9 +2243,9 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer) v3dv_write_uniforms(cmd_buffer, pipeline->vs_bin); /* Update the cache dirty flag based on the shader progs data */ - job->tmu_dirty_rcl |= pipeline->vs_bin->prog_data.vs->base.tmu_dirty_rcl; - job->tmu_dirty_rcl |= pipeline->vs->prog_data.vs->base.tmu_dirty_rcl; - job->tmu_dirty_rcl |= pipeline->fs->prog_data.fs->base.tmu_dirty_rcl; + job->tmu_dirty_rcl |= pipeline->vs_bin->current_variant->prog_data.vs->base.tmu_dirty_rcl; + job->tmu_dirty_rcl |= pipeline->vs->current_variant->prog_data.vs->base.tmu_dirty_rcl; + job->tmu_dirty_rcl |= pipeline->fs->current_variant->prog_data.fs->base.tmu_dirty_rcl; /* See GFXH-930 workaround below */ uint32_t num_elements_to_emit = MAX2(pipeline->va_count, 1); @@ -2271,11 +2271,11 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer) pipeline->vpm_cfg.As; shader.coordinate_shader_code_address = - v3dv_cl_address(pipeline->vs_bin->assembly_bo, 0); + v3dv_cl_address(pipeline->vs_bin->current_variant->assembly_bo, 0); shader.vertex_shader_code_address = - v3dv_cl_address(pipeline->vs->assembly_bo, 0); + v3dv_cl_address(pipeline->vs->current_variant->assembly_bo, 0); shader.fragment_shader_code_address = - v3dv_cl_address(pipeline->fs->assembly_bo, 0); + v3dv_cl_address(pipeline->fs->current_variant->assembly_bo, 0); shader.coordinate_shader_uniforms_address = vs_bin_uniforms; shader.vertex_shader_uniforms_address = vs_uniforms; @@ -2296,6 +2296,12 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer) struct v3dv_vertex_binding *c_vb = &cmd_buffer->state.vertex_bindings[binding]; + struct v3d_vs_prog_data *prog_data_vs = + pipeline->vs->current_variant->prog_data.vs; + + struct v3d_vs_prog_data *prog_data_vs_bin = + pipeline->vs_bin->current_variant->prog_data.vs; + cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, &pipeline->vertex_attrs[i * packet_length], attr) { @@ -2306,9 +2312,9 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer) c_vb->offset); attr.number_of_values_read_by_coordinate_shader = - pipeline->vs_bin->prog_data.vs->vattr_sizes[location]; + prog_data_vs_bin->vattr_sizes[location]; attr.number_of_values_read_by_vertex_shader = - pipeline->vs->prog_data.vs->vattr_sizes[location]; + prog_data_vs->vattr_sizes[location]; /* GFXH-930: At least one attribute must be enabled and read by CS * and VS. If we have attributes being consumed by the VS but not @@ -2316,7 +2322,7 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer) * CS's VPM inputs. (Since CS is just dead-code-elimination compared * to VS, we can't have CS loading but not VS). */ - if (pipeline->vs_bin->prog_data.vs->vattr_sizes[location]) + if (prog_data_vs->vattr_sizes[location]) cs_loaded_any = true; if (binding == pipeline->va_count - 1 && !cs_loaded_any) { diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index 4227e1e2b73..270b5a9ad80 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -32,6 +32,8 @@ #include "compiler/nir/nir_builder.h" +#include "util/u_atomic.h" + #include "vulkan/util/vk_format.h" #include "broadcom/cle/v3dx_pack.h" @@ -83,7 +85,16 @@ destroy_pipeline_stage(struct v3dv_device *device, struct v3dv_pipeline_stage *p_stage, const VkAllocationCallbacks *pAllocator) { - v3dv_bo_free(device, p_stage->assembly_bo); + hash_table_foreach(p_stage->cache, entry) { + struct v3dv_shader_variant *variant = entry->data; + + if (variant->assembly_bo) { + v3dv_bo_free(device, variant->assembly_bo); + variant->assembly_bo = NULL; + } + } + + _mesa_hash_table_destroy(p_stage->cache, NULL); vk_free2(&device->alloc, pAllocator, p_stage); } @@ -689,6 +700,26 @@ pipeline_populate_v3d_key(struct v3d_key *key, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct v3dv_pipeline_stage *p_stage) { + /* The following values are default values used at pipeline create, that + * lack the info about the real sampler/texture format used, needed to + * decide about lowerings and other stuff affecting the final + * assembly. When all that info is in place, it would be needed to check if + * it is needed a shader variant (if we are lucky the default values would + * be the same and no new compilation will be done) + */ + nir_shader *s = p_stage->nir; + + key->num_tex_used = s->info.num_textures; + for (uint32_t i = 0; i < s->info.num_textures; i++) { + key->tex[i].swizzle[0] = PIPE_SWIZZLE_X; + key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y; + key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z; + key->tex[i].swizzle[3] = PIPE_SWIZZLE_W; + + key->tex[i].return_size = 16; + key->tex[i].return_channels = 2; + } + /* default value. Would be override on the vs/gs populate methods when GS * gets supported */ @@ -888,14 +919,55 @@ pipeline_populate_v3d_vs_key(struct v3d_vs_key *key, key->num_used_outputs = 0; } else { struct v3dv_pipeline *pipeline = p_stage->pipeline; - key->num_used_outputs = pipeline->fs->prog_data.fs->num_inputs; + struct v3dv_shader_variant *fs_variant = pipeline->fs->current_variant; + + key->num_used_outputs = fs_variant->prog_data.fs->num_inputs; + STATIC_ASSERT(sizeof(key->used_outputs) == - sizeof(pipeline->fs->prog_data.fs->input_slots)); - memcpy(key->used_outputs, pipeline->fs->prog_data.fs->input_slots, + sizeof(fs_variant->prog_data.fs->input_slots)); + memcpy(key->used_outputs, fs_variant->prog_data.fs->input_slots, sizeof(key->used_outputs)); } } +/* FIXME: following hash/compare methods are C&P from v3d. Common place? */ +static uint32_t +fs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_fs_key)); +} + +static uint32_t +vs_cache_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct v3d_vs_key)); +} + +static bool +fs_cache_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0; +} + +static bool +vs_cache_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0; +} + +static struct hash_table* +create_variant_cache(gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + return _mesa_hash_table_create(NULL, vs_cache_hash, vs_cache_compare); + case MESA_SHADER_FRAGMENT: + return _mesa_hash_table_create(NULL, fs_cache_hash, fs_cache_compare); + default: + unreachable("not supported shader stage"); + } +} + /* * Creates the pipeline_stage for the coordinate shader. Initially a clone of * the vs pipeline_stage, with is_coord to true; @@ -917,6 +989,11 @@ pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src, p_stage->module = src->module; p_stage->nir = src->nir; + /* Technically we could share the hash_table, but having their own makes + * destroy p_stage more straightforward + */ + p_stage->cache = create_variant_cache(MESA_SHADER_VERTEX); + p_stage->is_coord = true; return p_stage; @@ -924,14 +1001,15 @@ pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src, /* FIXME: right now this just asks for an bo for the exact size of the qpu * assembly. It would be good to be slighly smarter and having one "all - * shaders" bo per pipeline, so each p_stage would save their offset on - * such. That is really relevant due the fact that bo are always aligned to + * shaders" bo per pipeline, so each p_stage/variant would save their offset + * on such. That is really relevant due the fact that bo are always aligned to * 4096, so that would allow to use less memory. * * For now one-bo per-assembly would work. */ static void upload_assembly(struct v3dv_pipeline_stage *p_stage, + struct v3dv_shader_variant *variant, const void *data, uint32_t size) { @@ -939,7 +1017,7 @@ upload_assembly(struct v3dv_pipeline_stage *p_stage, /* We are uploading the assembly just once, so at this point we shouldn't * have any bo */ - assert(p_stage->assembly_bo == NULL); + assert(variant->assembly_bo == NULL); struct v3dv_device *device = p_stage->pipeline->device; switch (p_stage->stage) { @@ -971,32 +1049,41 @@ upload_assembly(struct v3dv_pipeline_stage *p_stage, v3dv_bo_unmap(device, bo); - p_stage->assembly_bo = bo; + variant->assembly_bo = bo; } -static void -compile_pipeline_stage(struct v3dv_pipeline_stage *p_stage) +/* For a given key, it returns the compiled version of the shader. If it was + * already compiled, it gets it from the p_stage cache, if not it compiles is + * through the v3d compiler + */ +static struct v3dv_shader_variant* +get_shader_variant(struct v3dv_pipeline_stage *p_stage, + struct v3d_key *key, + size_t key_size) { + struct hash_table *ht = p_stage->cache; + struct hash_entry *entry = _mesa_hash_table_search(ht, key); + + if (entry) + return entry->data; + + struct v3dv_device *device = p_stage->pipeline->device; + struct v3dv_shader_variant *variant = + vk_zalloc(&device->alloc, sizeof(*variant), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + struct v3dv_physical_device *physical_device = &p_stage->pipeline->device->instance->physicalDevice; const struct v3d_compiler *compiler = physical_device->compiler; - /* We don't support variants (and probably will never support them) */ - int variant_id = 0; - - /* Note that we are assigning program_id slightly differently that - * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin - * would have a different program_id, while v3d would have the same for - * both. For the case of v3dv, it is more natural to have an id this way, - * as right now we are using it for debugging, not for shader-db. - */ - p_stage->program_id = physical_device->next_program_id++; + uint32_t variant_id = p_atomic_inc_return(&p_stage->compiled_variant_count); if (V3D_DEBUG & (V3D_DEBUG_NIR | v3d_debug_flag_for_shader_stage(p_stage->stage))) { - fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n", + fprintf(stderr, "Just before v3d_compile: %s prog %d variant %d NIR:\n", gl_shader_stage_name(p_stage->stage), - p_stage->program_id); + p_stage->program_id, + variant_id); nir_print_shader(p_stage->nir, stderr); fprintf(stderr, "\n"); } @@ -1005,7 +1092,7 @@ compile_pipeline_stage(struct v3dv_pipeline_stage *p_stage) uint32_t qpu_insts_size; qpu_insts = v3d_compile(compiler, - &p_stage->key.base, &p_stage->prog_data.base, + key, &variant->prog_data.base, p_stage->nir, shader_debug_output, NULL, p_stage->program_id, @@ -1017,10 +1104,22 @@ compile_pipeline_stage(struct v3dv_pipeline_stage *p_stage) gl_shader_stage_name(p_stage->stage), p_stage->program_id); } else { - upload_assembly(p_stage, qpu_insts, qpu_insts_size); + upload_assembly(p_stage, variant, qpu_insts, qpu_insts_size); } free(qpu_insts); + + if (ht) { + struct v3d_key *dup_key; + dup_key = ralloc_size(ht, key_size); + memcpy(dup_key, key, key_size); + _mesa_hash_table_insert(ht, dup_key, variant); + } + + /* FIXME: pending provide scratch space for register spilling */ + assert(variant->prog_data.base->spill_size == 0); + + return variant; } /* FIXME: C&P from st, common place? */ @@ -1132,6 +1231,8 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, { struct v3dv_pipeline_stage *stages[MESA_SHADER_STAGES] = { }; struct v3dv_device *device = pipeline->device; + struct v3dv_physical_device *physical_device = + &device->instance->physicalDevice; /* First pass to get the the common info from the shader and the nir * shader. We don't care of the coord shader for now. @@ -1144,6 +1245,16 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, vk_zalloc2(&device->alloc, pAllocator, sizeof(*p_stage), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + /* Note that we are assigning program_id slightly differently that + * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin + * would have a different program_id, while v3d would have the same for + * both. For the case of v3dv, it is more natural to have an id this way, + * as right now we are using it for debugging, not for shader-db. + */ + p_stage->program_id = physical_device->next_program_id++; + p_stage->compiled_variant_count = 0; + p_stage->cache = create_variant_cache(stage); + p_stage->pipeline = pipeline; p_stage->stage = stage; if (stage == MESA_SHADER_VERTEX) @@ -1178,6 +1289,10 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, p_stage->module = 0; p_stage->nir = b.shader; + p_stage->program_id = physical_device->next_program_id++; + p_stage->compiled_variant_count = 0; + p_stage->cache = create_variant_cache(MESA_SHADER_FRAGMENT); + stages[MESA_SHADER_FRAGMENT] = p_stage; pipeline->active_stages |= MESA_SHADER_FRAGMENT; } @@ -1204,7 +1319,10 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, next_stage = stages[stage]; } - /* Compiling to vir */ + /* Compiling to vir. Note that at this point we are compiling a default + * variant. Binding to textures, and other stuff (that would need a + * cmd_buffer) would need a recompile + */ for (int stage = MESA_SHADER_STAGES - 1; stage >= 0; stage--) { if (stages[stage] == NULL || stages[stage]->entrypoint == NULL) continue; @@ -1214,7 +1332,7 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, pipeline_lower_nir(pipeline, p_stage, pipeline->layout); switch(stage) { - case MESA_SHADER_VERTEX: + case MESA_SHADER_VERTEX: { /* Right now we only support pipelines with both vertex and fragment * shader. */ @@ -1234,25 +1352,35 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, lower_vs_io(p_stage->nir); /* Note that at this point we would compile twice, one for vs and - * other for vs_bin. For now we are maintaining two pipeline_stage - * and two keys. Eventually we could reuse the key. + * other for vs_bin. For now we are maintaining two pipeline_stages. + * + * FIXME: this leads to two caches, when it shouldnt, revisit */ - pipeline_populate_v3d_vs_key(&pipeline->vs->key.vs, pCreateInfo, pipeline->vs); - pipeline_populate_v3d_vs_key(&pipeline->vs_bin->key.vs, pCreateInfo, pipeline->vs_bin); + struct v3d_vs_key *key = &pipeline->vs->key.vs; + pipeline_populate_v3d_vs_key(key, pCreateInfo, pipeline->vs); + pipeline->vs->current_variant = + get_shader_variant(pipeline->vs, &key->base, sizeof(*key)); - compile_pipeline_stage(pipeline->vs); - compile_pipeline_stage(pipeline->vs_bin); + key = &pipeline->vs_bin->key.vs; + pipeline_populate_v3d_vs_key(key, pCreateInfo, pipeline->vs_bin); + pipeline->vs_bin->current_variant = + get_shader_variant(pipeline->vs_bin, &key->base, sizeof(*key)); break; - case MESA_SHADER_FRAGMENT: + } + case MESA_SHADER_FRAGMENT: { + struct v3d_fs_key *key = &p_stage->key.fs; + pipeline->fs = p_stage; - pipeline_populate_v3d_fs_key(&p_stage->key.fs, pCreateInfo, - p_stage); + pipeline_populate_v3d_fs_key(key, pCreateInfo, p_stage); lower_fs_io(p_stage->nir); - compile_pipeline_stage(pipeline->fs); + p_stage->current_variant = + get_shader_variant(p_stage, &key->base, sizeof(*key)); + break; + } default: unreachable("not supported shader stage"); } @@ -1263,11 +1391,13 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, */ pipeline->vpm_cfg_bin.As = 1; pipeline->vpm_cfg_bin.Ve = 0; - pipeline->vpm_cfg_bin.Vc = pipeline->vs_bin->prog_data.vs->vcm_cache_size; + pipeline->vpm_cfg_bin.Vc = + pipeline->vs_bin->current_variant->prog_data.vs->vcm_cache_size; pipeline->vpm_cfg.As = 1; pipeline->vpm_cfg.Ve = 0; - pipeline->vpm_cfg.Vc = pipeline->vs->prog_data.vs->vcm_cache_size; + pipeline->vpm_cfg.Vc = + pipeline->vs->current_variant->prog_data.vs->vcm_cache_size; return VK_SUCCESS; } @@ -1720,6 +1850,16 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline) assert(sizeof(pipeline->shader_state_record) == cl_packet_length(GL_SHADER_STATE_RECORD)); + struct v3d_fs_prog_data *prog_data_fs = + pipeline->fs->current_variant->prog_data.fs; + + struct v3d_vs_prog_data *prog_data_vs = + pipeline->vs->current_variant->prog_data.vs; + + struct v3d_vs_prog_data *prog_data_vs_bin = + pipeline->vs_bin->current_variant->prog_data.vs; + + /* Note: we are not packing addresses, as we need the job (see * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this * point as they depend on dynamic info that can be set after create the @@ -1730,33 +1870,31 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline) shader.enable_clipping = true; shader.point_size_in_shaded_vertex_data = - pipeline->vs->key.vs.per_vertex_point_size; + pipeline->vs->topology == PIPE_PRIM_POINTS; /* Must be set if the shader modifies Z, discards, or modifies * the sample mask. For any of these cases, the fragment * shader needs to write the Z value (even just discards). */ - shader.fragment_shader_does_z_writes = - pipeline->fs->prog_data.fs->writes_z; + shader.fragment_shader_does_z_writes = prog_data_fs->writes_z; /* Set if the EZ test must be disabled (due to shader side * effects and the early_z flag not being present in the * shader). */ - shader.turn_off_early_z_test = - pipeline->fs->prog_data.fs->disable_ez; + shader.turn_off_early_z_test = prog_data_fs->disable_ez; shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 = - pipeline->fs->prog_data.fs->uses_center_w; + prog_data_fs->uses_center_w; shader.any_shader_reads_hardware_written_primitive_id = false; shader.do_scoreboard_wait_on_first_thread_switch = - pipeline->fs->prog_data.fs->lock_scoreboard_on_first_thrsw; + prog_data_fs->lock_scoreboard_on_first_thrsw; shader.disable_implicit_point_line_varyings = - !pipeline->fs->prog_data.fs->uses_implicit_point_line_varyings; + !prog_data_fs->uses_implicit_point_line_varyings; shader.number_of_varyings_in_fragment_shader = - pipeline->fs->prog_data.fs->num_inputs; + prog_data_fs->num_inputs; shader.coordinate_shader_propagate_nans = true; shader.vertex_shader_propagate_nans = true; @@ -1771,21 +1909,21 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline) * on v3d, see v3dx_draw). */ shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = - pipeline->vs_bin->prog_data.vs->separate_segments; + prog_data_vs_bin->separate_segments; shader.vertex_shader_has_separate_input_and_output_vpm_blocks = - pipeline->vs->prog_data.vs->separate_segments; + prog_data_vs->separate_segments; shader.coordinate_shader_input_vpm_segment_size = - pipeline->vs_bin->prog_data.vs->separate_segments ? - pipeline->vs_bin->prog_data.vs->vpm_input_size : 1; + prog_data_vs_bin->separate_segments ? + prog_data_vs_bin->vpm_input_size : 1; shader.vertex_shader_input_vpm_segment_size = - pipeline->vs->prog_data.vs->separate_segments ? - pipeline->vs->prog_data.vs->vpm_input_size : 1; + prog_data_vs->separate_segments ? + prog_data_vs->vpm_input_size : 1; shader.coordinate_shader_output_vpm_segment_size = - pipeline->vs_bin->prog_data.vs->vpm_output_size; + prog_data_vs_bin->vpm_output_size; shader.vertex_shader_output_vpm_segment_size = - pipeline->vs->prog_data.vs->vpm_output_size; + prog_data_vs->vpm_output_size; /* Note: see previous note about adresses */ /* shader.coordinate_shader_uniforms_address */ @@ -1803,27 +1941,27 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline) pipeline->vpm_cfg.Ve; shader.coordinate_shader_4_way_threadable = - pipeline->vs_bin->prog_data.vs->base.threads == 4; + prog_data_vs_bin->base.threads == 4; shader.vertex_shader_4_way_threadable = - pipeline->vs->prog_data.vs->base.threads == 4; + prog_data_vs->base.threads == 4; shader.fragment_shader_4_way_threadable = - pipeline->fs->prog_data.fs->base.threads == 4; + prog_data_fs->base.threads == 4; shader.coordinate_shader_start_in_final_thread_section = - pipeline->vs_bin->prog_data.vs->base.single_seg; + prog_data_vs_bin->base.single_seg; shader.vertex_shader_start_in_final_thread_section = - pipeline->vs->prog_data.vs->base.single_seg; + prog_data_vs->base.single_seg; shader.fragment_shader_start_in_final_thread_section = - pipeline->fs->prog_data.fs->base.single_seg; + prog_data_fs->base.single_seg; shader.vertex_id_read_by_coordinate_shader = - pipeline->vs_bin->prog_data.vs->uses_vid; + prog_data_vs_bin->uses_vid; shader.instance_id_read_by_coordinate_shader = - pipeline->vs_bin->prog_data.vs->uses_iid; + prog_data_vs_bin->uses_iid; shader.vertex_id_read_by_vertex_shader = - pipeline->vs->prog_data.vs->uses_vid; + prog_data_vs->uses_vid; shader.instance_id_read_by_vertex_shader = - pipeline->vs->prog_data.vs->uses_iid; + prog_data_vs->uses_iid; /* Note: see previous note about adresses */ /* shader.address_of_default_attribute_values */ diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 61254d52e93..61778d7fc7f 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -779,6 +779,20 @@ vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) return ffs(vk_stage) - 1; } +struct v3dv_shader_variant { + union { + struct v3d_prog_data *base; + struct v3d_vs_prog_data *vs; + struct v3d_fs_prog_data *fs; + } prog_data; + + /* FIXME: using one bo per shader. Eventually we would be interested on + * reusing the same bo for all the shaders, like a bo per v3dv_pipeline for + * shaders. + */ + struct v3dv_bo *assembly_bo; +}; + /* * Per-stage info for each stage, useful so shader_module_compile_to_nir and * other methods doesn't have so many parameters. @@ -805,29 +819,30 @@ struct v3dv_pipeline_stage { /** A name for this program, so you can track it in shader-db output. */ uint32_t program_id; + /** How many variants of this program were compiled, for shader-db. */ + uint32_t compiled_variant_count; + /* The following are the default v3d_key populated using + * VkCreateGraphicsPipelineCreateInfo. Variants will be created tweaking + * them, so we don't need to maintain a copy of that create info struct + * around + */ union { struct v3d_key base; struct v3d_vs_key vs; struct v3d_fs_key fs; } key; - union { - struct v3d_prog_data *base; - struct v3d_vs_prog_data *vs; - struct v3d_fs_prog_data *fs; - } prog_data; + /* Cache with all the shader variant. + */ + struct hash_table *cache; + + struct v3dv_shader_variant *current_variant; /* FIXME: only make sense on vs, so perhaps a v3dv key like radv? or a kind * of pipe_draw_info */ enum pipe_prim_type topology; - - /* FIXME: using one bo per shader. Eventually we would be interested on - * reusing the same bo for all the shaders, like a bo per v3dv_pipeline for - * shaders. - */ - struct v3dv_bo *assembly_bo; }; /* FIXME: although the full vpm_config is not required at this point, as we diff --git a/src/broadcom/vulkan/v3dv_uniforms.c b/src/broadcom/vulkan/v3dv_uniforms.c index b4bfe421619..7b4dad84e9f 100644 --- a/src/broadcom/vulkan/v3dv_uniforms.c +++ b/src/broadcom/vulkan/v3dv_uniforms.c @@ -247,7 +247,8 @@ struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_pipeline_stage *p_stage) { - struct v3d_uniform_list *uinfo = &p_stage->prog_data.base->uniforms; + struct v3d_uniform_list *uinfo = + &p_stage->current_variant->prog_data.base->uniforms; struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; struct v3dv_pipeline *pipeline = p_stage->pipeline;