diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index d48545e397c..6953afc5db9 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -264,6 +264,8 @@ struct tu_pipeline_builder const VkGraphicsPipelineCreateInfo *create_info; struct tu_shader *shaders[MESA_SHADER_STAGES]; + struct ir3_shader_variant *variants[MESA_SHADER_STAGES]; + struct ir3_shader_variant *binning_variant; uint32_t shader_offsets[MESA_SHADER_STAGES]; uint32_t binning_vs_offset; uint32_t shader_total_size; @@ -1387,13 +1389,6 @@ tu6_emit_geometry_consts(struct tu_cs *cs, ARRAY_SIZE(params), params); } -/* get pointer to first variant, return NULL if shader is NULL */ -static const struct ir3_shader_variant * -tu_shader_get_variant(const struct tu_shader *shader) -{ - return shader ? &shader->variants[0] : NULL; -} - static void tu6_emit_program(struct tu_cs *cs, struct tu_pipeline_builder *builder, @@ -1401,12 +1396,10 @@ tu6_emit_program(struct tu_cs *cs, bool binning_pass, struct tu_streamout_state *tf) { - const struct ir3_shader_variant *vs = - tu_shader_get_variant(builder->shaders[MESA_SHADER_VERTEX]); - const struct ir3_shader_variant *gs = - tu_shader_get_variant(builder->shaders[MESA_SHADER_GEOMETRY]); - const struct ir3_shader_variant *fs = - tu_shader_get_variant(builder->shaders[MESA_SHADER_FRAGMENT]); + const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX]; + const struct ir3_shader_variant *bs = builder->binning_variant; + const struct ir3_shader_variant *gs = builder->variants[MESA_SHADER_GEOMETRY]; + const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT]; gl_shader_stage stage = MESA_SHADER_VERTEX; STATIC_ASSERT(MESA_SHADER_VERTEX == 0); @@ -1414,24 +1407,18 @@ tu6_emit_program(struct tu_cs *cs, tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1); tu_cs_emit(cs, 0xff); /* XXX */ - /* if we have streamout, use full VS in binning pass, as the - * binning pass VS will have outputs on other than position/psize - * stripped out - * - * GS also can have streamout, but we completely disable the - * the binning pass variant when GS is present because we don't - * support compiling correct binning pass variants with GS + /* Don't use the binning pass variant when GS is present because we don't + * support compiling correct binning pass variants with GS. */ - if (binning_pass && vs->shader->stream_output.num_outputs == 0 && !gs) { - vs = &builder->shaders[MESA_SHADER_VERTEX]->variants[1]; - tu6_emit_xs_config(cs, stage, vs, + if (binning_pass && !gs) { + vs = bs; + tu6_emit_xs_config(cs, stage, bs, binary_bo->iova + builder->binning_vs_offset); stage++; } for (; stage < ARRAY_SIZE(builder->shaders); stage++) { - const struct ir3_shader_variant *xs = - tu_shader_get_variant(builder->shaders[stage]); + const struct ir3_shader_variant *xs = builder->variants[stage]; if (stage == MESA_SHADER_FRAGMENT && binning_pass) fs = xs = NULL; @@ -1978,10 +1965,8 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder) struct tu_shader_compile_options options; tu_shader_compile_options_init(&options, builder->create_info); - /* compile shaders in reverse order */ - struct tu_shader *next_stage_shader = NULL; - for (gl_shader_stage stage = MESA_SHADER_STAGES - 1; - stage > MESA_SHADER_NONE; stage--) { + for (gl_shader_stage stage = MESA_SHADER_VERTEX; + stage < MESA_SHADER_STAGES; stage++) { const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage]; if (!stage_info && stage != MESA_SHADER_FRAGMENT) continue; @@ -1992,32 +1977,44 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder) if (!shader) return VK_ERROR_OUT_OF_HOST_MEMORY; - VkResult result = - tu_shader_compile(builder->device, shader, next_stage_shader, - &options, builder->alloc); - if (result != VK_SUCCESS) - return result; - builder->shaders[stage] = shader; - builder->shader_offsets[stage] = builder->shader_total_size; - builder->shader_total_size += - sizeof(uint32_t) * shader->variants[0].info.sizedwords; - - next_stage_shader = shader; } - if (builder->shaders[MESA_SHADER_VERTEX]->has_binning_pass) { - const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; - const struct ir3_shader_variant *variant; + for (gl_shader_stage stage = MESA_SHADER_STAGES - 1; + stage > MESA_SHADER_NONE; stage--) { + if (!builder->shaders[stage]) + continue; + + bool created; + builder->variants[stage] = + ir3_shader_get_variant(builder->shaders[stage]->ir3_shader, + &options.key, false, &created); + if (!builder->variants[stage]) + return VK_ERROR_OUT_OF_HOST_MEMORY; - if (vs->ir3_shader.stream_output.num_outputs) - variant = &vs->variants[0]; - else - variant = &vs->variants[1]; + builder->shader_offsets[stage] = builder->shader_total_size; + builder->shader_total_size += + sizeof(uint32_t) * builder->variants[stage]->info.sizedwords; + } + + if (options.include_binning_pass) { + const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; + struct ir3_shader_variant *variant; + + if (vs->ir3_shader->stream_output.num_outputs) { + variant = builder->variants[MESA_SHADER_VERTEX]; + } else { + bool created; + variant = ir3_shader_get_variant(vs->ir3_shader, &options.key, + true, &created); + if (!variant) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } builder->binning_vs_offset = builder->shader_total_size; builder->shader_total_size += sizeof(uint32_t) * variant->info.sizedwords; + builder->binning_variant = variant; } return VK_SUCCESS; @@ -2039,28 +2036,17 @@ tu_pipeline_builder_upload_shaders(struct tu_pipeline_builder *builder, return result; for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { - const struct tu_shader *shader = builder->shaders[i]; - if (!shader) + const struct ir3_shader_variant *variant = builder->variants[i]; + if (!variant) continue; - memcpy(bo->map + builder->shader_offsets[i], shader->binary, - sizeof(uint32_t) * shader->variants[0].info.sizedwords); + memcpy(bo->map + builder->shader_offsets[i], variant->bin, + sizeof(uint32_t) * variant->info.sizedwords); } - if (builder->shaders[MESA_SHADER_VERTEX]->has_binning_pass) { - const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; - const struct ir3_shader_variant *variant; - void *bin; - - if (vs->ir3_shader.stream_output.num_outputs) { - variant = &vs->variants[0]; - bin = vs->binary; - } else { - variant = &vs->variants[1]; - bin = vs->binning_binary; - } - - memcpy(bo->map + builder->binning_vs_offset, bin, + if (builder->binning_variant) { + const struct ir3_shader_variant *variant = builder->binning_variant; + memcpy(bo->map + builder->binning_vs_offset, variant->bin, sizeof(uint32_t) * variant->info.sizedwords); } @@ -2120,7 +2106,7 @@ tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, tu_pipeline_set_linkage(&pipeline->program.link[i], builder->shaders[i], - &builder->shaders[i]->variants[0]); + builder->variants[i]); desc_sets |= builder->shaders[i]->active_desc_sets; } pipeline->active_desc_sets = desc_sets; @@ -2138,20 +2124,21 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder, { const VkPipelineVertexInputStateCreateInfo *vi_info = builder->create_info->pVertexInputState; - const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; + const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX]; + const struct ir3_shader_variant *bs = builder->binning_variant; struct tu_cs vi_cs; tu_cs_begin_sub_stream(&pipeline->cs, MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs); - tu6_emit_vertex_input(&vi_cs, &vs->variants[0], vi_info, + tu6_emit_vertex_input(&vi_cs, vs, vi_info, &pipeline->vi.bindings_used); pipeline->vi.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vi_cs); - if (vs->has_binning_pass) { + if (bs) { tu_cs_begin_sub_stream(&pipeline->cs, MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs); tu6_emit_vertex_input( - &vi_cs, &vs->variants[1], vi_info, &pipeline->vi.bindings_used); + &vi_cs, bs, vi_info, &pipeline->vi.bindings_used); pipeline->vi.binning_state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vi_cs); } @@ -2524,11 +2511,10 @@ tu_CreateGraphicsPipelines(VkDevice device, static VkResult tu_compute_upload_shader(VkDevice device, struct tu_pipeline *pipeline, - struct tu_shader *shader) + struct ir3_shader_variant *v) { TU_FROM_HANDLE(tu_device, dev, device); struct tu_bo *bo = &pipeline->program.binary_bo; - struct ir3_shader_variant *v = &shader->variants[0]; uint32_t shader_size = sizeof(uint32_t) * v->info.sizedwords; VkResult result = @@ -2540,7 +2526,7 @@ tu_compute_upload_shader(VkDevice device, if (result != VK_SUCCESS) return result; - memcpy(bo->map, shader->binary, shader_size); + memcpy(bo->map, v->bin, shader_size); return VK_SUCCESS; } @@ -2578,16 +2564,16 @@ tu_compute_pipeline_create(VkDevice device, goto fail; } - result = tu_shader_compile(dev, shader, NULL, &options, pAllocator); - if (result != VK_SUCCESS) + bool created; + struct ir3_shader_variant *v = + ir3_shader_get_variant(shader->ir3_shader, &options.key, false, &created); + if (!v) goto fail; - struct ir3_shader_variant *v = &shader->variants[0]; - tu_pipeline_set_linkage(&pipeline->program.link[MESA_SHADER_COMPUTE], shader, v); - result = tu_compute_upload_shader(device, pipeline, shader); + result = tu_compute_upload_shader(device, pipeline, v); if (result != VK_SUCCESS) goto fail; diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 6006df36cdd..9dbf4c50e35 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -1114,21 +1114,11 @@ struct tu_push_constant_range struct tu_shader { - struct ir3_shader ir3_shader; + struct ir3_shader *ir3_shader; struct tu_push_constant_range push_consts; unsigned attachment_idx[MAX_RTS]; uint8_t active_desc_sets; - - /* This may be true for vertex shaders. When true, variants[1] is the - * binning variant and binning_binary is non-NULL. - */ - bool has_binning_pass; - - void *binary; - void *binning_binary; - - struct ir3_shader_variant variants[0]; }; struct tu_shader * @@ -1148,13 +1138,6 @@ tu_shader_compile_options_init( struct tu_shader_compile_options *options, const VkGraphicsPipelineCreateInfo *pipeline_info); -VkResult -tu_shader_compile(struct tu_device *dev, - struct tu_shader *shader, - const struct tu_shader *next_stage, - const struct tu_shader_compile_options *options, - const VkAllocationCallbacks *alloc); - struct tu_program_descriptor_linkage { struct ir3_ubo_analysis_state ubo_state; diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index df99fe6cd9c..1717a8bd1c1 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -423,7 +423,6 @@ gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader) if (min >= max) { tu_shader->push_consts.lo = 0; tu_shader->push_consts.count = 0; - tu_shader->ir3_shader.const_state.num_reserved_user_consts = 0; return; } @@ -434,8 +433,6 @@ gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader) tu_shader->push_consts.lo = (min / 16) / 4 * 4; tu_shader->push_consts.count = align(max, 16) / 16 - tu_shader->push_consts.lo; - tu_shader->ir3_shader.const_state.num_reserved_user_consts = - align(tu_shader->push_consts.count, 4); } /* Gather the InputAttachmentIndex for each input attachment from the NIR @@ -495,9 +492,8 @@ tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader, } static void -tu_gather_xfb_info(nir_shader *nir, struct tu_shader *shader) +tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info) { - struct ir3_stream_output_info *info = &shader->ir3_shader.stream_output; nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL); if (!xfb) @@ -545,10 +541,9 @@ tu_shader_create(struct tu_device *dev, { struct tu_shader *shader; - const uint32_t max_variant_count = (stage == MESA_SHADER_VERTEX) ? 2 : 1; shader = vk_zalloc2( &dev->alloc, alloc, - sizeof(*shader) + sizeof(struct ir3_shader_variant) * max_variant_count, + sizeof(*shader), 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (!shader) return NULL; @@ -609,10 +604,11 @@ tu_shader_create(struct tu_device *dev, * Also needs to be called after nir_remove_dead_variables with varyings, * so that we could align stream outputs correctly. */ + struct ir3_stream_output_info so_info = {}; if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL || nir->info.stage == MESA_SHADER_GEOMETRY) - tu_gather_xfb_info(nir, shader); + tu_gather_xfb_info(nir, &so_info); NIR_PASS_V(nir, nir_propagate_invariant); @@ -639,31 +635,14 @@ tu_shader_create(struct tu_device *dev, if (stage == MESA_SHADER_FRAGMENT) NIR_PASS_V(nir, nir_lower_input_attachments, true); - if (stage == MESA_SHADER_GEOMETRY) - NIR_PASS_V(nir, ir3_nir_lower_gs); - NIR_PASS_V(nir, tu_lower_io, shader, layout); - NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, 0); - - if (stage == MESA_SHADER_FRAGMENT) { - /* NOTE: lower load_barycentric_at_sample first, since it - * produces load_barycentric_at_offset: - */ - NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_sample); - NIR_PASS_V(nir, ir3_nir_lower_load_barycentric_at_offset); - - NIR_PASS_V(nir, ir3_nir_move_varying_inputs); - } - nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - /* num_uniforms only used by ir3 for size of ubo 0 (push constants) */ - nir->num_uniforms = MAX_PUSH_CONSTANTS_SIZE / 16; - - shader->ir3_shader.compiler = dev->compiler; - shader->ir3_shader.type = stage; - shader->ir3_shader.nir = nir; + shader->ir3_shader = + ir3_shader_from_nir(dev->compiler, nir, + align(shader->push_consts.count, 4), + &so_info); return shader; } @@ -673,20 +652,7 @@ tu_shader_destroy(struct tu_device *dev, struct tu_shader *shader, const VkAllocationCallbacks *alloc) { - if (shader->ir3_shader.nir) - ralloc_free(shader->ir3_shader.nir); - - for (uint32_t i = 0; i < 1 + shader->has_binning_pass; i++) { - if (shader->variants[i].ir) - ir3_destroy(shader->variants[i].ir); - } - - if (shader->ir3_shader.const_state.immediates) - free(shader->ir3_shader.const_state.immediates); - if (shader->binary) - free(shader->binary); - if (shader->binning_binary) - free(shader->binning_binary); + ir3_shader_destroy(shader->ir3_shader); vk_free2(&dev->alloc, alloc, shader); } @@ -732,95 +698,6 @@ tu_shader_compile_options_init( }; } -static uint32_t * -tu_compile_shader_variant(struct ir3_shader *shader, - const struct ir3_shader_key *key, - struct ir3_shader_variant *nonbinning, - struct ir3_shader_variant *variant) -{ - variant->shader = shader; - variant->type = shader->type; - variant->key = *key; - variant->binning_pass = !!nonbinning; - variant->nonbinning = nonbinning; - - int ret = ir3_compile_shader_nir(shader->compiler, variant); - if (ret) - return NULL; - - /* when assemble fails, we rely on tu_shader_destroy to clean up the - * variant - */ - return ir3_shader_assemble(variant, shader->compiler->gpu_id); -} - -VkResult -tu_shader_compile(struct tu_device *dev, - struct tu_shader *shader, - const struct tu_shader *next_stage, - const struct tu_shader_compile_options *options, - const VkAllocationCallbacks *alloc) -{ - if (options->optimize) { - /* ignore the key for the first pass of optimization */ - ir3_optimize_nir(&shader->ir3_shader, shader->ir3_shader.nir, NULL); - - if (unlikely(dev->physical_device->instance->debug_flags & - TU_DEBUG_NIR)) { - fprintf(stderr, "optimized nir:\n"); - nir_print_shader(shader->ir3_shader.nir, stderr); - } - } - - shader->binary = tu_compile_shader_variant( - &shader->ir3_shader, &options->key, NULL, &shader->variants[0]); - if (!shader->binary) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - if (shader_debug_enabled(shader->ir3_shader.type)) { - fprintf(stdout, "Native code for unnamed %s shader %s:\n", - ir3_shader_stage(&shader->variants[0]), shader->ir3_shader.nir->info.name); - if (shader->ir3_shader.type == MESA_SHADER_FRAGMENT) - fprintf(stdout, "SIMD0\n"); - ir3_shader_disasm(&shader->variants[0], shader->binary, stdout); - } - - /* compile another variant for the binning pass */ - if (options->include_binning_pass && - shader->ir3_shader.type == MESA_SHADER_VERTEX) { - shader->binning_binary = tu_compile_shader_variant( - &shader->ir3_shader, &options->key, &shader->variants[0], - &shader->variants[1]); - if (!shader->binning_binary) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - shader->has_binning_pass = true; - - if (shader_debug_enabled(MESA_SHADER_VERTEX)) { - fprintf(stdout, "Native code for unnamed binning shader %s:\n", - shader->ir3_shader.nir->info.name); - ir3_shader_disasm(&shader->variants[1], shader->binary, stdout); - } - } - - if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_IR3)) { - fprintf(stderr, "disassembled ir3:\n"); - fprintf(stderr, "shader: %s\n", - gl_shader_stage_name(shader->ir3_shader.type)); - ir3_shader_disasm(&shader->variants[0], shader->binary, stderr); - - if (shader->has_binning_pass) { - fprintf(stderr, "disassembled ir3:\n"); - fprintf(stderr, "shader: %s (binning)\n", - gl_shader_stage_name(shader->ir3_shader.type)); - ir3_shader_disasm(&shader->variants[1], shader->binning_binary, - stderr); - } - } - - return VK_SUCCESS; -} - VkResult tu_CreateShaderModule(VkDevice _device, const VkShaderModuleCreateInfo *pCreateInfo,