panvk: Simplify shader initialization in the pipeline logic

The multi-step compilation is not needed since we don't have clever linking optimization tricks just yet. Let's handle compilation, shader upload and renderer state descriptor emission in one step and make as much as we can stage agnostic. The remaining FS-specific stuff are moved to init_fs_state(). While at it, move as much information as we can to the panvk_pipeline_shader object we created, to make the transition to vk_shader easier. We also stop using MESA_SHADER_STAGES-sized arrays everywhere, since we only support vertex and fragment shaders. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28927>
2024-04-24 14:15:32 +02:00
parent e7a9bd1cbe
commit dfbec67cb0
3 changed files with 162 additions and 232 deletions
@@ -25,6 +25,13 @@

 #define MAX_RTS 8

+struct panvk_pipeline_shader {
+   mali_ptr code;
+   mali_ptr rsd;
+   struct pan_shader_info info;
+   bool has_img_access;
+};
+
 enum panvk_pipeline_type {
   PANVK_PIPELINE_GRAPHICS,
   PANVK_PIPELINE_COMPUTE,
@@ -38,15 +45,6 @@ struct panvk_pipeline {

   struct panvk_pool bin_pool;
   struct panvk_pool desc_pool;
-
-   unsigned active_stages;
-
-   uint64_t rsds[MESA_SHADER_STAGES];
-
-   /* shader stage bit is set of the stage accesses storage images */
-   uint32_t img_access_mask;
-
-   unsigned tls_size;
 };

 VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_pipeline, base, VkPipeline,
@@ -55,16 +53,13 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_pipeline, base, VkPipeline,
 struct panvk_graphics_pipeline {
   struct panvk_pipeline base;

+   struct panvk_pipeline_shader vs;
+   struct panvk_pipeline_shader fs;
+
   struct panvk_varyings_info varyings;

   struct {
      struct {
-         bool writes_point_size;
-      } vs;
-
-      struct {
-         uint64_t address;
-         struct pan_shader_info info;
         bool required;
         bool dynamic_rsd;
         uint8_t rt_mask;
@@ -100,8 +95,9 @@ panvk_pipeline_to_graphics_pipeline(struct panvk_pipeline *pipeline)
 struct panvk_compute_pipeline {
   struct panvk_pipeline base;

+   struct panvk_pipeline_shader cs;
+
   struct pan_compute_dim local_size;
-   unsigned wls_size;
 };

 static struct panvk_compute_pipeline *
@@ -535,7 +535,7 @@ panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
   const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline;

   if (!pipeline->state.fs.dynamic_rsd) {
-      draw->fs_rsd = pipeline->base.rsds[MESA_SHADER_FRAGMENT];
+      draw->fs_rsd = pipeline->fs.rsd;
      return;
   }

@@ -708,7 +708,7 @@ panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
   const struct vk_input_assembly_state *ia =
      &cmdbuf->vk.dynamic_graphics_state.ia;
   bool writes_point_size =
-      pipeline->state.vs.writes_point_size &&
+      pipeline->vs.info.vs.writes_point_size &&
      ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;

   for (unsigned i = 0, buf_idx = 0; i < PANVK_VARY_BUF_MAX; i++) {
@@ -806,7 +806,7 @@ panvk_prepare_img_attribs(struct panvk_cmd_buffer *cmdbuf,
                          struct panvk_descriptor_state *desc_state,
                          const struct panvk_pipeline *pipeline)
 {
-   if (desc_state->img.attribs || !pipeline->img_access_mask)
+   if (desc_state->img.attribs)
      return;

   unsigned attrib_count = pipeline->layout->num_imgs;
@@ -925,9 +925,7 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
   const struct vk_vertex_input_state *vi =
      cmdbuf->vk.dynamic_graphics_state.vi;
   unsigned num_imgs =
-      pipeline->base.img_access_mask & BITFIELD_BIT(MESA_SHADER_VERTEX)
-         ? pipeline->base.layout->num_imgs
-         : 0;
+      pipeline->vs.has_img_access ? pipeline->base.layout->num_imgs : 0;
   unsigned num_vs_attribs = util_last_bit(vi->attributes_valid);
   unsigned num_vbs = util_last_bit(vi->bindings_valid);
   unsigned attrib_count =
@@ -1004,16 +1002,16 @@ panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf,
   struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
   const struct panvk_graphics_pipeline *pipeline = cmdbuf->state.gfx.pipeline;

-   for (unsigned i = 0; i < ARRAY_SIZE(draw->stages); i++) {
-      if (i == MESA_SHADER_VERTEX) {
-         panvk_draw_prepare_vs_attribs(cmdbuf, draw);
-         draw->stages[i].attributes = cmdbuf->state.gfx.vs.attribs;
-         draw->stages[i].attribute_bufs = cmdbuf->state.gfx.vs.attrib_bufs;
-      } else if (pipeline->base.img_access_mask & BITFIELD_BIT(i)) {
-         panvk_prepare_img_attribs(cmdbuf, desc_state, &pipeline->base);
-         draw->stages[i].attributes = desc_state->img.attribs;
-         draw->stages[i].attribute_bufs = desc_state->img.attrib_bufs;
-      }
+   panvk_draw_prepare_vs_attribs(cmdbuf, draw);
+   draw->stages[MESA_SHADER_VERTEX].attributes = cmdbuf->state.gfx.vs.attribs;
+   draw->stages[MESA_SHADER_VERTEX].attribute_bufs =
+      cmdbuf->state.gfx.vs.attrib_bufs;
+
+   if (pipeline->fs.has_img_access) {
+      panvk_prepare_img_attribs(cmdbuf, desc_state, &pipeline->base);
+      draw->stages[MESA_SHADER_FRAGMENT].attributes = desc_state->img.attribs;
+      draw->stages[MESA_SHADER_FRAGMENT].attribute_bufs =
+         desc_state->img.attrib_bufs;
   }
 }

@@ -1092,7 +1090,7 @@ panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf,
   }

   pan_section_pack(ptr.cpu, COMPUTE_JOB, DRAW, cfg) {
-      cfg.state = pipeline->base.rsds[MESA_SHADER_VERTEX];
+      cfg.state = pipeline->vs.rsd;
      cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes;
      cfg.attribute_buffers = draw->stages[MESA_SHADER_VERTEX].attribute_bufs;
      cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings;
@@ -1142,7 +1140,7 @@ panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf,
   const struct vk_input_assembly_state *ia =
      &cmdbuf->vk.dynamic_graphics_state.ia;
   bool writes_point_size =
-      pipeline->state.vs.writes_point_size &&
+      pipeline->vs.info.vs.writes_point_size &&
      ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;

   pan_pack(prim, PRIMITIVE, cfg) {
@@ -1189,7 +1187,7 @@ panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf,
   const struct vk_input_assembly_state *ia =
      &cmdbuf->vk.dynamic_graphics_state.ia;
   bool writes_point_size =
-      pipeline->state.vs.writes_point_size &&
+      pipeline->vs.info.vs.writes_point_size &&
      ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;

   pan_pack(primsz, PRIMITIVE_SIZE, cfg) {
@@ -1367,7 +1365,8 @@ panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
   panvk_draw_prepare_vertex_job(cmdbuf, draw);
   panvk_draw_prepare_tiler_job(cmdbuf, draw);
   batch->tlsinfo.tls.size =
-      MAX2(pipeline->base.tls_size, batch->tlsinfo.tls.size);
+      MAX3(pipeline->vs.info.tls_size, pipeline->fs.info.tls_size,
+           batch->tlsinfo.tls.size);

   unsigned vjob_id =
      pan_jc_add_job(&cmdbuf->desc_pool.base, &batch->jc, MALI_JOB_TYPE_VERTEX,
@@ -1779,7 +1778,10 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
   dispatch.tsd = batch->tls.gpu;

   panvk_cmd_prepare_push_sets(cmdbuf, desc_state, &pipeline->base);
-   panvk_prepare_img_attribs(cmdbuf, desc_state, &pipeline->base);
+
+   if (pipeline->cs.has_img_access)
+      panvk_prepare_img_attribs(cmdbuf, desc_state, &pipeline->base);
+
   dispatch.attributes = desc_state->img.attribs;
   dispatch.attribute_bufs = desc_state->img.attrib_bufs;

@@ -1809,7 +1811,7 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
   }

   pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
-      cfg.state = pipeline->base.rsds[MESA_SHADER_COMPUTE];
+      cfg.state = pipeline->cs.rsd;
      cfg.attributes = dispatch.attributes;
      cfg.attribute_buffers = dispatch.attribute_bufs;
      cfg.thread_storage = dispatch.tsd;
@@ -1822,8 +1824,8 @@ panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x,
   pan_jc_add_job(&cmdbuf->desc_pool.base, &batch->jc, MALI_JOB_TYPE_COMPUTE,
                  false, false, 0, 0, &job, false);

-   batch->tlsinfo.tls.size = pipeline->base.tls_size;
-   batch->tlsinfo.wls.size = pipeline->wls_size;
+   batch->tlsinfo.tls.size = pipeline->cs.info.tls_size;
+   batch->tlsinfo.wls.size = pipeline->cs.info.wls_size;
   if (batch->tlsinfo.wls.size) {
      unsigned core_id_range;

@@ -52,20 +52,6 @@
 #include "pan_earlyzs.h"
 #include "pan_shader.h"

-static void
-release_shaders(struct panvk_pipeline *pipeline, struct panvk_shader **shaders,
-                const VkAllocationCallbacks *alloc)
-{
-   struct panvk_device *dev = to_panvk_device(pipeline->base.device);
-
-   for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
-      if (!shaders[i])
-         continue;
-
-      panvk_per_arch(shader_destroy)(dev, shaders[i], alloc);
-   }
-}
-
 static bool
 dyn_state_is_set(const struct panvk_graphics_pipeline *pipeline, uint32_t id)
 {
@@ -75,72 +61,6 @@ dyn_state_is_set(const struct panvk_graphics_pipeline *pipeline, uint32_t id)
   return BITSET_TEST(pipeline->state.dynamic.set, id);
 }

-static VkResult
-compile_shaders(struct panvk_pipeline *pipeline,
-                const VkPipelineShaderStageCreateInfo *stages,
-                uint32_t stage_count, const VkAllocationCallbacks *alloc,
-                struct panvk_shader **shaders)
-{
-   struct panvk_device *dev = to_panvk_device(pipeline->base.device);
-   struct panvk_graphics_pipeline *gfx_pipeline =
-      panvk_pipeline_to_graphics_pipeline(pipeline);
-   const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
-      NULL,
-   };
-
-   for (uint32_t i = 0; i < stage_count; i++) {
-      gl_shader_stage stage = vk_to_mesa_shader_stage(stages[i].stage);
-      stage_infos[stage] = &stages[i];
-   }
-
-   /* compile shaders in reverse order */
-   for (gl_shader_stage stage = MESA_SHADER_STAGES - 1;
-        stage > MESA_SHADER_NONE; stage--) {
-      const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
-      if (!stage_info)
-         continue;
-
-      struct panvk_shader *shader;
-
-      shader = panvk_per_arch(shader_create)(
-         dev, stage_info, pipeline->layout,
-         gfx_pipeline ? &gfx_pipeline->state.blend.pstate : NULL,
-         dyn_state_is_set(gfx_pipeline, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS),
-         alloc);
-      if (!shader)
-         return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-      shaders[stage] = shader;
-   }
-
-   return VK_SUCCESS;
-}
-
-static mali_ptr
-upload_shader(struct panvk_pipeline *pipeline,
-              const struct panvk_shader *shader)
-{
-   void *shader_data = util_dynarray_element(&shader->binary, uint8_t, 0);
-   unsigned shader_sz = util_dynarray_num_elements(&shader->binary, uint8_t);
-
-   if (!shader_sz)
-      return 0;
-
-   return pan_pool_upload_aligned(&pipeline->bin_pool.base, shader_data,
-                                  shader_sz, 128);
-}
-
-static void
-emit_non_fs_rsd(const struct pan_shader_info *shader_info, mali_ptr shader_ptr,
-                void *rsd)
-{
-   assert(shader_info->stage != MESA_SHADER_FRAGMENT);
-
-   pan_pack(rsd, RENDERER_STATE, cfg) {
-      pan_shader_prepare_rsd(shader_info, shader_ptr, &cfg);
-   }
-}
-
 static bool
 writes_depth(const struct vk_depth_stencil_state *ds)
 {
@@ -225,7 +145,7 @@ static void
 emit_base_fs_rsd(const struct panvk_graphics_pipeline *pipeline,
                 const struct vk_graphics_pipeline_state *state, void *rsd)
 {
-   const struct pan_shader_info *info = &pipeline->state.fs.info;
+   const struct pan_shader_info *info = &pipeline->fs.info;
   const struct vk_rasterization_state *rs = state->rs;
   const struct vk_depth_stencil_state *ds = state->ds;
   const struct vk_multisample_state *ms = state->ms;
@@ -238,13 +158,12 @@ emit_base_fs_rsd(const struct panvk_graphics_pipeline *pipeline,
      bool alpha_to_coverage = ms && ms->alpha_to_coverage_enable;

      if (pipeline->state.fs.required) {
-         pan_shader_prepare_rsd(info, pipeline->state.fs.address, &cfg);
+         pan_shader_prepare_rsd(info, pipeline->fs.code, &cfg);

-         uint8_t rt_written =
-            pipeline->state.fs.info.outputs_written >> FRAG_RESULT_DATA0;
+         uint8_t rt_written = info->outputs_written >> FRAG_RESULT_DATA0;
         uint8_t rt_mask = pipeline->state.fs.rt_mask;
         cfg.properties.allow_forward_pixel_to_kill =
-            pipeline->state.fs.info.fs.can_fpk && !(rt_mask & ~rt_written) &&
+            pipeline->fs.info.fs.can_fpk && !(rt_mask & ~rt_written) &&
            !alpha_to_coverage && !pipeline->state.blend.reads_dest;

         bool writes_zs = writes_z || writes_s;
@@ -405,80 +324,11 @@ emit_blend(const struct panvk_graphics_pipeline *pipeline, unsigned rt,
      cfg.internal.fixed_function.conversion.memory_format =
         GENX(panfrost_dithered_format_from_pipe_format)(rts->format, dithered);
      cfg.internal.fixed_function.conversion.register_format =
-         blend_type_from_nir(pipeline->state.fs.info.bifrost.blend[rt].type);
+         blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type);
      cfg.internal.fixed_function.rt = rt;
   }
 }

-static void
-init_shaders(struct panvk_pipeline *pipeline,
-             const VkGraphicsPipelineCreateInfo *gfx_create_info,
-             const struct vk_graphics_pipeline_state *gfx_state,
-             struct panvk_shader **shaders)
-{
-   struct panvk_graphics_pipeline *gfx_pipeline =
-      panvk_pipeline_to_graphics_pipeline(pipeline);
-   struct panvk_compute_pipeline *compute_pipeline =
-      panvk_pipeline_to_compute_pipeline(pipeline);
-
-   for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
-      const struct panvk_shader *shader = shaders[i];
-      if (!shader)
-         continue;
-
-      pipeline->tls_size = MAX2(pipeline->tls_size, shader->info.tls_size);
-
-      if (shader->has_img_access)
-         pipeline->img_access_mask |= BITFIELD_BIT(i);
-
-      if (i == MESA_SHADER_VERTEX)
-         gfx_pipeline->state.vs.writes_point_size =
-            shader->info.vs.writes_point_size;
-
-      mali_ptr shader_ptr = i == MESA_SHADER_FRAGMENT
-                               ? gfx_pipeline->state.fs.address
-                               : upload_shader(pipeline, shader);
-
-      if (i != MESA_SHADER_FRAGMENT) {
-         struct panfrost_ptr rsd =
-            pan_pool_alloc_desc(&pipeline->desc_pool.base, RENDERER_STATE);
-
-         emit_non_fs_rsd(&shader->info, shader_ptr, rsd.cpu);
-         pipeline->rsds[i] = rsd.gpu;
-      }
-
-      if (i == MESA_SHADER_COMPUTE) {
-         compute_pipeline->local_size = shader->local_size;
-         compute_pipeline->wls_size = shader->info.wls_size;
-      }
-   }
-
-   if (gfx_create_info && !gfx_pipeline->state.fs.dynamic_rsd) {
-      unsigned bd_count = MAX2(gfx_pipeline->state.blend.pstate.rt_count, 1);
-      struct panfrost_ptr rsd = pan_pool_alloc_desc_aggregate(
-         &pipeline->desc_pool.base, PAN_DESC(RENDERER_STATE),
-         PAN_DESC_ARRAY(bd_count, BLEND));
-      void *bd = rsd.cpu + pan_size(RENDERER_STATE);
-
-      emit_base_fs_rsd(gfx_pipeline, gfx_state, rsd.cpu);
-      for (unsigned rt = 0; rt < gfx_pipeline->state.blend.pstate.rt_count;
-           rt++) {
-         emit_blend(gfx_pipeline, rt, bd);
-         bd += pan_size(BLEND);
-      }
-
-      pipeline->rsds[MESA_SHADER_FRAGMENT] = rsd.gpu;
-   } else if (gfx_create_info) {
-      emit_base_fs_rsd(gfx_pipeline, gfx_state,
-                       gfx_pipeline->state.fs.rsd_template.opaque);
-      for (unsigned rt = 0;
-           rt < MAX2(gfx_pipeline->state.blend.pstate.rt_count, 1); rt++) {
-         emit_blend(gfx_pipeline, rt,
-                    &gfx_pipeline->state.blend.bd_template[rt].opaque);
-      }
-   }
-}
-
 #define is_dyn(__state, __name)                                                \
   BITSET_TEST((__state)->dynamic, MESA_VK_DYNAMIC_##__name)

@@ -578,7 +428,7 @@ parse_color_blend(struct panvk_graphics_pipeline *pipeline,
 static bool
 fs_required(struct panvk_graphics_pipeline *pipeline)
 {
-   const struct pan_shader_info *info = &pipeline->state.fs.info;
+   const struct pan_shader_info *info = &pipeline->fs.info;

   /* If we generally have side effects */
   if (info->fs.sidefx)
@@ -598,21 +448,33 @@ fs_required(struct panvk_graphics_pipeline *pipeline)

 static void
 init_fs_state(struct panvk_graphics_pipeline *pipeline,
-              const struct vk_graphics_pipeline_state *state,
-              struct panvk_shader *shader)
+              const struct vk_graphics_pipeline_state *state)
 {
-   if (!shader)
-      return;
-
   pipeline->state.fs.dynamic_rsd = is_dyn(state, RS_DEPTH_BIAS_FACTORS) ||
                                    is_dyn(state, CB_BLEND_CONSTANTS) ||
                                    is_dyn(state, DS_STENCIL_COMPARE_MASK) ||
                                    is_dyn(state, DS_STENCIL_WRITE_MASK) ||
                                    is_dyn(state, DS_STENCIL_REFERENCE);
-   pipeline->state.fs.address = upload_shader(&pipeline->base, shader);
-   pipeline->state.fs.info = shader->info;
   pipeline->state.fs.rt_mask = get_active_color_attachments(state);
   pipeline->state.fs.required = fs_required(pipeline);
+
+   unsigned bd_count = MAX2(pipeline->state.blend.pstate.rt_count, 1);
+   struct mali_renderer_state_packed *rsd = &pipeline->state.fs.rsd_template;
+   struct mali_blend_packed *bds = pipeline->state.blend.bd_template;
+
+   if (!pipeline->state.fs.dynamic_rsd) {
+      struct panfrost_ptr ptr = pan_pool_alloc_desc_aggregate(
+         &pipeline->base.desc_pool.base, PAN_DESC(RENDERER_STATE),
+         PAN_DESC_ARRAY(bd_count, BLEND));
+
+      rsd = ptr.cpu;
+      bds = ptr.cpu + pan_size(RENDERER_STATE);
+      pipeline->fs.rsd = ptr.gpu;
+   }
+
+   emit_base_fs_rsd(pipeline, state, rsd);
+   for (unsigned i = 0; i < bd_count; i++)
+      emit_blend(pipeline, i, &bds[i]);
 }

 static void
@@ -656,24 +518,19 @@ update_varying_slot(struct panvk_varyings_info *varyings, gl_shader_stage stage,
 }

 static void
-collect_varyings(struct panvk_graphics_pipeline *pipeline,
-                 struct panvk_shader **shaders)
+collect_varyings(struct panvk_graphics_pipeline *pipeline)
 {
-   for (uint32_t s = 0; s < MESA_SHADER_STAGES; s++) {
-      if (!shaders[s])
-         continue;
+   const struct pan_shader_info *vs_info = &pipeline->vs.info;
+   const struct pan_shader_info *fs_info = &pipeline->fs.info;

-      const struct pan_shader_info *info = &shaders[s]->info;
+   for (unsigned i = 0; i < vs_info->varyings.output_count; i++) {
+      update_varying_slot(&pipeline->varyings, MESA_SHADER_VERTEX,
+                          &vs_info->varyings.output[i], false);
+   }

-      for (unsigned i = 0; i < info->varyings.input_count; i++) {
-         update_varying_slot(&pipeline->varyings, s, &info->varyings.input[i],
-                             true);
-      }
-
-      for (unsigned i = 0; i < info->varyings.output_count; i++) {
-         update_varying_slot(&pipeline->varyings, s, &info->varyings.output[i],
-                             false);
-      }
+   for (unsigned i = 0; i < fs_info->varyings.input_count; i++) {
+      update_varying_slot(&pipeline->varyings, MESA_SHADER_FRAGMENT,
+                          &fs_info->varyings.input[i], true);
   }

   /* TODO: Xfb */
@@ -693,6 +550,60 @@ collect_varyings(struct panvk_graphics_pipeline *pipeline,
   }
 }

+static VkResult
+init_pipeline_shader(struct panvk_pipeline *pipeline,
+                     const VkPipelineShaderStageCreateInfo *stage_info,
+                     const VkAllocationCallbacks *alloc,
+                     struct panvk_pipeline_shader *pshader)
+{
+   struct panvk_device *dev = to_panvk_device(pipeline->base.device);
+   struct panvk_graphics_pipeline *gfx_pipeline =
+      panvk_pipeline_to_graphics_pipeline(pipeline);
+   struct panvk_shader *shader;
+
+   shader = panvk_per_arch(shader_create)(
+      dev, stage_info, pipeline->layout,
+      gfx_pipeline ? &gfx_pipeline->state.blend.pstate : NULL,
+      dyn_state_is_set(gfx_pipeline, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS),
+      alloc);
+   if (!shader)
+      return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   void *shader_data = util_dynarray_element(&shader->binary, uint8_t, 0);
+   unsigned shader_sz = util_dynarray_num_elements(&shader->binary, uint8_t);
+
+   if (shader_sz) {
+      pshader->code = pan_pool_upload_aligned(&pipeline->bin_pool.base,
+                                              shader_data, shader_sz, 128);
+   } else {
+      pshader->code = 0;
+   }
+
+   pshader->info = shader->info;
+   pshader->has_img_access = shader->has_img_access;
+
+   if (stage_info->stage == VK_SHADER_STAGE_COMPUTE_BIT) {
+      struct panvk_compute_pipeline *compute_pipeline =
+         panvk_pipeline_to_compute_pipeline(pipeline);
+
+      compute_pipeline->local_size = shader->local_size;
+   }
+
+   if (stage_info->stage != VK_SHADER_STAGE_FRAGMENT_BIT) {
+      struct panfrost_ptr rsd =
+         pan_pool_alloc_desc(&pipeline->desc_pool.base, RENDERER_STATE);
+
+      pan_pack(rsd.cpu, RENDERER_STATE, cfg) {
+         pan_shader_prepare_rsd(&pshader->info, pshader->code, &cfg);
+      }
+
+      pshader->rsd = rsd.gpu;
+   }
+
+   panvk_per_arch(shader_destroy)(dev, shader, alloc);
+   return VK_SUCCESS;
+}
+
 static VkResult
 panvk_graphics_pipeline_create(struct panvk_device *dev,
                               struct vk_pipeline_cache *cache,
@@ -730,16 +641,38 @@ panvk_graphics_pipeline_create(struct panvk_device *dev,
   panvk_pool_init(&gfx_pipeline->base.desc_pool, dev, NULL, 0, 4096,
                   "Pipeline static state", false);

-   struct panvk_shader *shaders[MESA_SHADER_STAGES] = {NULL};
-
   parse_color_blend(gfx_pipeline, &state);
-   compile_shaders(&gfx_pipeline->base, create_info->pStages,
-                   create_info->stageCount, alloc, shaders);
-   collect_varyings(gfx_pipeline, shaders);
-   init_fs_state(gfx_pipeline, &state, shaders[MESA_SHADER_FRAGMENT]);
-   init_shaders(&gfx_pipeline->base, create_info, &state, shaders);

-   release_shaders(&gfx_pipeline->base, shaders, alloc);
+   /* Make sure the stage info is correct even if no stage info is provided for
+    * this stage in pStages.
+    */
+   gfx_pipeline->vs.info.stage = MESA_SHADER_VERTEX;
+   gfx_pipeline->fs.info.stage = MESA_SHADER_FRAGMENT;
+
+   for (uint32_t i = 0; i < create_info->stageCount; i++) {
+      struct panvk_pipeline_shader *pshader = NULL;
+      switch (create_info->pStages[i].stage) {
+      case VK_SHADER_STAGE_VERTEX_BIT:
+         pshader = &gfx_pipeline->vs;
+         break;
+
+      case VK_SHADER_STAGE_FRAGMENT_BIT:
+         pshader = &gfx_pipeline->fs;
+         break;
+
+      default:
+         assert(!"Unsupported graphics pipeline stage");
+      }
+
+      VkResult result = init_pipeline_shader(
+         &gfx_pipeline->base, &create_info->pStages[i], alloc, pshader);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   collect_varyings(gfx_pipeline);
+   init_fs_state(gfx_pipeline, &state);
+
   return VK_SUCCESS;
 }

@@ -796,13 +729,12 @@ panvk_compute_pipeline_create(struct panvk_device *dev,
   panvk_pool_init(&compute_pipeline->base.desc_pool, dev, NULL, 0, 4096,
                   "Pipeline static state", false);

-   struct panvk_shader *shaders[MESA_SHADER_STAGES] = {NULL};
+   VkResult result =
+      init_pipeline_shader(&compute_pipeline->base, &create_info->stage, alloc,
+                           &compute_pipeline->cs);
+   if (result != VK_SUCCESS)
+      return result;

-   compile_shaders(&compute_pipeline->base, &create_info->stage, 1, alloc,
-                   shaders);
-   init_shaders(&compute_pipeline->base, NULL, NULL, shaders);
-
-   release_shaders(&compute_pipeline->base, shaders, alloc);
   return VK_SUCCESS;
 }