diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index da104f64d7b..ee9eee9d000 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -1727,7 +1727,7 @@ cmd_buffer_update_ez_state(struct v3dv_cmd_buffer *cmd_buffer,
    }
 
    /* If the FS writes Z, then it may update against the chosen EZ direction */
-   if (pipeline->fs->prog_data.fs->writes_z)
+   if (pipeline->fs->current_variant->prog_data.fs->writes_z)
       job->ez_state = VC5_EZ_DISABLED;
 
    if (job->first_ez_state == VC5_EZ_UNDECIDED &&
@@ -2186,14 +2186,14 @@ emit_varyings_state(struct v3dv_cmd_buffer *cmd_buffer)
    struct v3dv_job *job = cmd_buffer->state.job;
    struct v3dv_pipeline *pipeline = cmd_buffer->state.pipeline;
 
+   struct v3d_fs_prog_data *prog_data_fs =
+      pipeline->fs->current_variant->prog_data.fs;
+
    const uint32_t num_flags =
-      ARRAY_SIZE(pipeline->fs->prog_data.fs->flat_shade_flags);
-   const uint32_t *flat_shade_flags =
-      pipeline->fs->prog_data.fs->flat_shade_flags;
-   const uint32_t *noperspective_flags =
-      pipeline->fs->prog_data.fs->noperspective_flags;
-   const uint32_t *centroid_flags =
-      pipeline->fs->prog_data.fs->centroid_flags;
+      ARRAY_SIZE(prog_data_fs->flat_shade_flags);
+   const uint32_t *flat_shade_flags = prog_data_fs->flat_shade_flags;
+   const uint32_t *noperspective_flags =  prog_data_fs->noperspective_flags;
+   const uint32_t *centroid_flags = prog_data_fs->centroid_flags;
 
    if (!emit_varying_flags(job, num_flags, flat_shade_flags,
                            emit_flat_shade_flags)) {
@@ -2243,9 +2243,9 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer)
       v3dv_write_uniforms(cmd_buffer, pipeline->vs_bin);
 
    /* Update the cache dirty flag based on the shader progs data */
-   job->tmu_dirty_rcl |= pipeline->vs_bin->prog_data.vs->base.tmu_dirty_rcl;
-   job->tmu_dirty_rcl |= pipeline->vs->prog_data.vs->base.tmu_dirty_rcl;
-   job->tmu_dirty_rcl |= pipeline->fs->prog_data.fs->base.tmu_dirty_rcl;
+   job->tmu_dirty_rcl |= pipeline->vs_bin->current_variant->prog_data.vs->base.tmu_dirty_rcl;
+   job->tmu_dirty_rcl |= pipeline->vs->current_variant->prog_data.vs->base.tmu_dirty_rcl;
+   job->tmu_dirty_rcl |= pipeline->fs->current_variant->prog_data.fs->base.tmu_dirty_rcl;
 
    /* See GFXH-930 workaround below */
    uint32_t num_elements_to_emit = MAX2(pipeline->va_count, 1);
@@ -2271,11 +2271,11 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer)
          pipeline->vpm_cfg.As;
 
       shader.coordinate_shader_code_address =
-         v3dv_cl_address(pipeline->vs_bin->assembly_bo, 0);
+         v3dv_cl_address(pipeline->vs_bin->current_variant->assembly_bo, 0);
       shader.vertex_shader_code_address =
-         v3dv_cl_address(pipeline->vs->assembly_bo, 0);
+         v3dv_cl_address(pipeline->vs->current_variant->assembly_bo, 0);
       shader.fragment_shader_code_address =
-         v3dv_cl_address(pipeline->fs->assembly_bo, 0);
+         v3dv_cl_address(pipeline->fs->current_variant->assembly_bo, 0);
 
       shader.coordinate_shader_uniforms_address = vs_bin_uniforms;
       shader.vertex_shader_uniforms_address = vs_uniforms;
@@ -2296,6 +2296,12 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer)
 
       struct v3dv_vertex_binding *c_vb = &cmd_buffer->state.vertex_bindings[binding];
 
+      struct v3d_vs_prog_data *prog_data_vs =
+         pipeline->vs->current_variant->prog_data.vs;
+
+      struct v3d_vs_prog_data *prog_data_vs_bin =
+         pipeline->vs_bin->current_variant->prog_data.vs;
+
       cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD,
                              &pipeline->vertex_attrs[i * packet_length], attr) {
 
@@ -2306,9 +2312,9 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer)
                                         c_vb->offset);
 
          attr.number_of_values_read_by_coordinate_shader =
-            pipeline->vs_bin->prog_data.vs->vattr_sizes[location];
+            prog_data_vs_bin->vattr_sizes[location];
          attr.number_of_values_read_by_vertex_shader =
-            pipeline->vs->prog_data.vs->vattr_sizes[location];
+            prog_data_vs->vattr_sizes[location];
 
          /* GFXH-930: At least one attribute must be enabled and read by CS
           * and VS.  If we have attributes being consumed by the VS but not
@@ -2316,7 +2322,7 @@ emit_gl_shader_state(struct v3dv_cmd_buffer *cmd_buffer)
           * CS's VPM inputs.  (Since CS is just dead-code-elimination compared
           * to VS, we can't have CS loading but not VS).
           */
-         if (pipeline->vs_bin->prog_data.vs->vattr_sizes[location])
+         if (prog_data_vs->vattr_sizes[location])
             cs_loaded_any = true;
 
          if (binding == pipeline->va_count - 1 && !cs_loaded_any) {
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index 4227e1e2b73..270b5a9ad80 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -32,6 +32,8 @@
 
 #include "compiler/nir/nir_builder.h"
 
+#include "util/u_atomic.h"
+
 #include "vulkan/util/vk_format.h"
 
 #include "broadcom/cle/v3dx_pack.h"
@@ -83,7 +85,16 @@ destroy_pipeline_stage(struct v3dv_device *device,
                        struct v3dv_pipeline_stage *p_stage,
                        const VkAllocationCallbacks *pAllocator)
 {
-   v3dv_bo_free(device, p_stage->assembly_bo);
+   hash_table_foreach(p_stage->cache, entry) {
+      struct v3dv_shader_variant *variant = entry->data;
+
+      if (variant->assembly_bo) {
+         v3dv_bo_free(device, variant->assembly_bo);
+         variant->assembly_bo = NULL;
+      }
+   }
+
+   _mesa_hash_table_destroy(p_stage->cache, NULL);
 
    vk_free2(&device->alloc, pAllocator, p_stage);
 }
@@ -689,6 +700,26 @@ pipeline_populate_v3d_key(struct v3d_key *key,
                           const VkGraphicsPipelineCreateInfo *pCreateInfo,
                           const struct v3dv_pipeline_stage *p_stage)
 {
+   /* The following values are default values used at pipeline create, that
+    * lack the info about the real sampler/texture format used, needed to
+    * decide about lowerings and other stuff affecting the final
+    * assembly. When all that info is in place, it would be needed to check if
+    * it is needed a shader variant (if we are lucky the default values would
+    * be the same and no new compilation will be done)
+    */
+   nir_shader *s = p_stage->nir;
+
+   key->num_tex_used = s->info.num_textures;
+   for (uint32_t i = 0; i < s->info.num_textures; i++) {
+      key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
+      key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
+      key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
+      key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
+
+      key->tex[i].return_size = 16;
+      key->tex[i].return_channels = 2;
+   }
+
    /* default value. Would be override on the vs/gs populate methods when GS
     * gets supported
     */
@@ -888,14 +919,55 @@ pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
       key->num_used_outputs = 0;
    } else {
       struct v3dv_pipeline *pipeline = p_stage->pipeline;
-      key->num_used_outputs = pipeline->fs->prog_data.fs->num_inputs;
+      struct v3dv_shader_variant *fs_variant = pipeline->fs->current_variant;
+
+      key->num_used_outputs = fs_variant->prog_data.fs->num_inputs;
+
       STATIC_ASSERT(sizeof(key->used_outputs) ==
-                    sizeof(pipeline->fs->prog_data.fs->input_slots));
-      memcpy(key->used_outputs, pipeline->fs->prog_data.fs->input_slots,
+                    sizeof(fs_variant->prog_data.fs->input_slots));
+      memcpy(key->used_outputs, fs_variant->prog_data.fs->input_slots,
              sizeof(key->used_outputs));
    }
 }
 
+/* FIXME: following hash/compare methods are C&P from v3d. Common place? */
+static uint32_t
+fs_cache_hash(const void *key)
+{
+   return _mesa_hash_data(key, sizeof(struct v3d_fs_key));
+}
+
+static uint32_t
+vs_cache_hash(const void *key)
+{
+   return _mesa_hash_data(key, sizeof(struct v3d_vs_key));
+}
+
+static bool
+fs_cache_compare(const void *key1, const void *key2)
+{
+   return memcmp(key1, key2, sizeof(struct v3d_fs_key)) == 0;
+}
+
+static bool
+vs_cache_compare(const void *key1, const void *key2)
+{
+   return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0;
+}
+
+static struct hash_table*
+create_variant_cache(gl_shader_stage stage)
+{
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+      return _mesa_hash_table_create(NULL, vs_cache_hash, vs_cache_compare);
+   case MESA_SHADER_FRAGMENT:
+      return _mesa_hash_table_create(NULL, fs_cache_hash, fs_cache_compare);
+   default:
+      unreachable("not supported shader stage");
+   }
+}
+
 /*
  * Creates the pipeline_stage for the coordinate shader. Initially a clone of
  * the vs pipeline_stage, with is_coord to true;
@@ -917,6 +989,11 @@ pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src,
    p_stage->module = src->module;
    p_stage->nir = src->nir;
 
+   /* Technically we could share the hash_table, but having their own makes
+    * destroy p_stage more straightforward
+    */
+   p_stage->cache = create_variant_cache(MESA_SHADER_VERTEX);
+
    p_stage->is_coord = true;
 
    return p_stage;
@@ -924,14 +1001,15 @@ pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src,
 
 /* FIXME: right now this just asks for an bo for the exact size of the qpu
  * assembly. It would be good to be slighly smarter and having one "all
- * shaders" bo per pipeline, so each p_stage would save their offset on
- * such. That is really relevant due the fact that bo are always aligned to
+ * shaders" bo per pipeline, so each p_stage/variant would save their offset
+ * on such. That is really relevant due the fact that bo are always aligned to
  * 4096, so that would allow to use less memory.
  *
  * For now one-bo per-assembly would work.
  */
 static void
 upload_assembly(struct v3dv_pipeline_stage *p_stage,
+                struct v3dv_shader_variant *variant,
                 const void *data,
                 uint32_t size)
 {
@@ -939,7 +1017,7 @@ upload_assembly(struct v3dv_pipeline_stage *p_stage,
    /* We are uploading the assembly just once, so at this point we shouldn't
     * have any bo
     */
-   assert(p_stage->assembly_bo == NULL);
+   assert(variant->assembly_bo == NULL);
    struct v3dv_device *device = p_stage->pipeline->device;
 
    switch (p_stage->stage) {
@@ -971,32 +1049,41 @@ upload_assembly(struct v3dv_pipeline_stage *p_stage,
 
    v3dv_bo_unmap(device, bo);
 
-   p_stage->assembly_bo = bo;
+   variant->assembly_bo = bo;
 }
 
-static void
-compile_pipeline_stage(struct v3dv_pipeline_stage *p_stage)
+/* For a given key, it returns the compiled version of the shader. If it was
+ * already compiled, it gets it from the p_stage cache, if not it compiles is
+ * through the v3d compiler
+ */
+static struct v3dv_shader_variant*
+get_shader_variant(struct v3dv_pipeline_stage *p_stage,
+                   struct v3d_key *key,
+                   size_t key_size)
 {
+   struct hash_table *ht = p_stage->cache;
+   struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+
+   if (entry)
+      return entry->data;
+
+   struct v3dv_device *device = p_stage->pipeline->device;
+   struct v3dv_shader_variant *variant =
+      vk_zalloc(&device->alloc, sizeof(*variant), 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
    struct v3dv_physical_device *physical_device =
       &p_stage->pipeline->device->instance->physicalDevice;
    const struct v3d_compiler *compiler = physical_device->compiler;
 
-   /* We don't support variants (and probably will never support them) */
-   int variant_id = 0;
-
-   /* Note that we are assigning program_id slightly differently that
-    * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
-    * would have a different program_id, while v3d would have the same for
-    * both. For the case of v3dv, it is more natural to have an id this way,
-    * as right now we are using it for debugging, not for shader-db.
-    */
-   p_stage->program_id = physical_device->next_program_id++;
+   uint32_t variant_id = p_atomic_inc_return(&p_stage->compiled_variant_count);
 
    if (V3D_DEBUG & (V3D_DEBUG_NIR |
                     v3d_debug_flag_for_shader_stage(p_stage->stage))) {
-      fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
+      fprintf(stderr, "Just before v3d_compile: %s prog %d variant %d NIR:\n",
               gl_shader_stage_name(p_stage->stage),
-              p_stage->program_id);
+              p_stage->program_id,
+              variant_id);
       nir_print_shader(p_stage->nir, stderr);
       fprintf(stderr, "\n");
    }
@@ -1005,7 +1092,7 @@ compile_pipeline_stage(struct v3dv_pipeline_stage *p_stage)
    uint32_t qpu_insts_size;
 
    qpu_insts = v3d_compile(compiler,
-                           &p_stage->key.base, &p_stage->prog_data.base,
+                           key, &variant->prog_data.base,
                            p_stage->nir,
                            shader_debug_output, NULL,
                            p_stage->program_id,
@@ -1017,10 +1104,22 @@ compile_pipeline_stage(struct v3dv_pipeline_stage *p_stage)
               gl_shader_stage_name(p_stage->stage),
               p_stage->program_id);
    } else {
-      upload_assembly(p_stage, qpu_insts, qpu_insts_size);
+      upload_assembly(p_stage, variant, qpu_insts, qpu_insts_size);
    }
 
    free(qpu_insts);
+
+   if (ht) {
+      struct v3d_key *dup_key;
+      dup_key = ralloc_size(ht, key_size);
+      memcpy(dup_key, key, key_size);
+      _mesa_hash_table_insert(ht, dup_key, variant);
+   }
+
+   /* FIXME: pending provide scratch space for register spilling */
+   assert(variant->prog_data.base->spill_size == 0);
+
+   return variant;
 }
 
 /* FIXME: C&P from st, common place? */
@@ -1132,6 +1231,8 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
 {
    struct v3dv_pipeline_stage *stages[MESA_SHADER_STAGES] = { };
    struct v3dv_device *device = pipeline->device;
+   struct v3dv_physical_device *physical_device =
+      &device->instance->physicalDevice;
 
    /* First pass to get the the common info from the shader and the nir
     * shader. We don't care of the coord shader for now.
@@ -1144,6 +1245,16 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
          vk_zalloc2(&device->alloc, pAllocator, sizeof(*p_stage), 8,
                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
 
+      /* Note that we are assigning program_id slightly differently that
+       * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
+       * would have a different program_id, while v3d would have the same for
+       * both. For the case of v3dv, it is more natural to have an id this way,
+       * as right now we are using it for debugging, not for shader-db.
+       */
+      p_stage->program_id = physical_device->next_program_id++;
+      p_stage->compiled_variant_count = 0;
+      p_stage->cache = create_variant_cache(stage);
+
       p_stage->pipeline = pipeline;
       p_stage->stage = stage;
       if (stage == MESA_SHADER_VERTEX)
@@ -1178,6 +1289,10 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
       p_stage->module = 0;
       p_stage->nir = b.shader;
 
+      p_stage->program_id = physical_device->next_program_id++;
+      p_stage->compiled_variant_count = 0;
+      p_stage->cache = create_variant_cache(MESA_SHADER_FRAGMENT);
+
       stages[MESA_SHADER_FRAGMENT] = p_stage;
       pipeline->active_stages |= MESA_SHADER_FRAGMENT;
    }
@@ -1204,7 +1319,10 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
       next_stage = stages[stage];
    }
 
-   /* Compiling to vir */
+   /* Compiling to vir. Note that at this point we are compiling a default
+    * variant. Binding to textures, and other stuff (that would need a
+    * cmd_buffer) would need a recompile
+    */
    for (int stage = MESA_SHADER_STAGES - 1; stage >= 0; stage--) {
       if (stages[stage] == NULL || stages[stage]->entrypoint == NULL)
          continue;
@@ -1214,7 +1332,7 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
       pipeline_lower_nir(pipeline, p_stage, pipeline->layout);
 
       switch(stage) {
-      case MESA_SHADER_VERTEX:
+      case MESA_SHADER_VERTEX: {
          /* Right now we only support pipelines with both vertex and fragment
           * shader.
           */
@@ -1234,25 +1352,35 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
          lower_vs_io(p_stage->nir);
 
          /* Note that at this point we would compile twice, one for vs and
-          * other for vs_bin. For now we are maintaining two pipeline_stage
-          * and two keys. Eventually we could reuse the key.
+          * other for vs_bin. For now we are maintaining two pipeline_stages.
+          *
+          * FIXME: this leads to two caches, when it shouldnt, revisit
           */
-         pipeline_populate_v3d_vs_key(&pipeline->vs->key.vs, pCreateInfo, pipeline->vs);
-         pipeline_populate_v3d_vs_key(&pipeline->vs_bin->key.vs, pCreateInfo, pipeline->vs_bin);
+         struct v3d_vs_key *key = &pipeline->vs->key.vs;
+         pipeline_populate_v3d_vs_key(key, pCreateInfo, pipeline->vs);
+         pipeline->vs->current_variant =
+            get_shader_variant(pipeline->vs, &key->base, sizeof(*key));
 
-         compile_pipeline_stage(pipeline->vs);
-         compile_pipeline_stage(pipeline->vs_bin);
+         key = &pipeline->vs_bin->key.vs;
+         pipeline_populate_v3d_vs_key(key, pCreateInfo, pipeline->vs_bin);
+         pipeline->vs_bin->current_variant =
+            get_shader_variant(pipeline->vs_bin, &key->base, sizeof(*key));
          break;
-      case MESA_SHADER_FRAGMENT:
+      }
+      case MESA_SHADER_FRAGMENT: {
+         struct v3d_fs_key *key = &p_stage->key.fs;
+
          pipeline->fs = p_stage;
 
-         pipeline_populate_v3d_fs_key(&p_stage->key.fs, pCreateInfo,
-                             p_stage);
+         pipeline_populate_v3d_fs_key(key, pCreateInfo, p_stage);
 
          lower_fs_io(p_stage->nir);
 
-         compile_pipeline_stage(pipeline->fs);
+         p_stage->current_variant =
+            get_shader_variant(p_stage, &key->base, sizeof(*key));
+
          break;
+      }
       default:
          unreachable("not supported shader stage");
       }
@@ -1263,11 +1391,13 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
     */
    pipeline->vpm_cfg_bin.As = 1;
    pipeline->vpm_cfg_bin.Ve = 0;
-   pipeline->vpm_cfg_bin.Vc = pipeline->vs_bin->prog_data.vs->vcm_cache_size;
+   pipeline->vpm_cfg_bin.Vc =
+      pipeline->vs_bin->current_variant->prog_data.vs->vcm_cache_size;
 
    pipeline->vpm_cfg.As = 1;
    pipeline->vpm_cfg.Ve = 0;
-   pipeline->vpm_cfg.Vc = pipeline->vs->prog_data.vs->vcm_cache_size;
+   pipeline->vpm_cfg.Vc =
+      pipeline->vs->current_variant->prog_data.vs->vcm_cache_size;
 
    return VK_SUCCESS;
 }
@@ -1720,6 +1850,16 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
    assert(sizeof(pipeline->shader_state_record) ==
           cl_packet_length(GL_SHADER_STATE_RECORD));
 
+   struct v3d_fs_prog_data *prog_data_fs =
+      pipeline->fs->current_variant->prog_data.fs;
+
+   struct v3d_vs_prog_data *prog_data_vs =
+      pipeline->vs->current_variant->prog_data.vs;
+
+   struct v3d_vs_prog_data *prog_data_vs_bin =
+      pipeline->vs_bin->current_variant->prog_data.vs;
+
+
    /* Note: we are not packing addresses, as we need the job (see
     * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
     * point as they depend on dynamic info that can be set after create the
@@ -1730,33 +1870,31 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
       shader.enable_clipping = true;
 
       shader.point_size_in_shaded_vertex_data =
-         pipeline->vs->key.vs.per_vertex_point_size;
+         pipeline->vs->topology == PIPE_PRIM_POINTS;
 
       /* Must be set if the shader modifies Z, discards, or modifies
        * the sample mask.  For any of these cases, the fragment
        * shader needs to write the Z value (even just discards).
        */
-      shader.fragment_shader_does_z_writes =
-         pipeline->fs->prog_data.fs->writes_z;
+      shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
       /* Set if the EZ test must be disabled (due to shader side
        * effects and the early_z flag not being present in the
        * shader).
        */
-      shader.turn_off_early_z_test =
-         pipeline->fs->prog_data.fs->disable_ez;
+      shader.turn_off_early_z_test = prog_data_fs->disable_ez;
 
       shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
-         pipeline->fs->prog_data.fs->uses_center_w;
+         prog_data_fs->uses_center_w;
 
       shader.any_shader_reads_hardware_written_primitive_id = false;
 
       shader.do_scoreboard_wait_on_first_thread_switch =
-         pipeline->fs->prog_data.fs->lock_scoreboard_on_first_thrsw;
+         prog_data_fs->lock_scoreboard_on_first_thrsw;
       shader.disable_implicit_point_line_varyings =
-         !pipeline->fs->prog_data.fs->uses_implicit_point_line_varyings;
+         !prog_data_fs->uses_implicit_point_line_varyings;
 
       shader.number_of_varyings_in_fragment_shader =
-         pipeline->fs->prog_data.fs->num_inputs;
+         prog_data_fs->num_inputs;
 
       shader.coordinate_shader_propagate_nans = true;
       shader.vertex_shader_propagate_nans = true;
@@ -1771,21 +1909,21 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
        * on v3d, see v3dx_draw).
        */
       shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
-         pipeline->vs_bin->prog_data.vs->separate_segments;
+         prog_data_vs_bin->separate_segments;
       shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
-         pipeline->vs->prog_data.vs->separate_segments;
+         prog_data_vs->separate_segments;
 
       shader.coordinate_shader_input_vpm_segment_size =
-         pipeline->vs_bin->prog_data.vs->separate_segments ?
-         pipeline->vs_bin->prog_data.vs->vpm_input_size : 1;
+         prog_data_vs_bin->separate_segments ?
+         prog_data_vs_bin->vpm_input_size : 1;
       shader.vertex_shader_input_vpm_segment_size =
-         pipeline->vs->prog_data.vs->separate_segments ?
-         pipeline->vs->prog_data.vs->vpm_input_size : 1;
+         prog_data_vs->separate_segments ?
+         prog_data_vs->vpm_input_size : 1;
 
       shader.coordinate_shader_output_vpm_segment_size =
-         pipeline->vs_bin->prog_data.vs->vpm_output_size;
+         prog_data_vs_bin->vpm_output_size;
       shader.vertex_shader_output_vpm_segment_size =
-         pipeline->vs->prog_data.vs->vpm_output_size;
+         prog_data_vs->vpm_output_size;
 
       /* Note: see previous note about adresses */
       /* shader.coordinate_shader_uniforms_address */
@@ -1803,27 +1941,27 @@ pack_shader_state_record(struct v3dv_pipeline *pipeline)
          pipeline->vpm_cfg.Ve;
 
       shader.coordinate_shader_4_way_threadable =
-         pipeline->vs_bin->prog_data.vs->base.threads == 4;
+         prog_data_vs_bin->base.threads == 4;
       shader.vertex_shader_4_way_threadable =
-         pipeline->vs->prog_data.vs->base.threads == 4;
+         prog_data_vs->base.threads == 4;
       shader.fragment_shader_4_way_threadable =
-         pipeline->fs->prog_data.fs->base.threads == 4;
+         prog_data_fs->base.threads == 4;
 
       shader.coordinate_shader_start_in_final_thread_section =
-         pipeline->vs_bin->prog_data.vs->base.single_seg;
+         prog_data_vs_bin->base.single_seg;
       shader.vertex_shader_start_in_final_thread_section =
-         pipeline->vs->prog_data.vs->base.single_seg;
+         prog_data_vs->base.single_seg;
       shader.fragment_shader_start_in_final_thread_section =
-         pipeline->fs->prog_data.fs->base.single_seg;
+         prog_data_fs->base.single_seg;
 
       shader.vertex_id_read_by_coordinate_shader =
-         pipeline->vs_bin->prog_data.vs->uses_vid;
+         prog_data_vs_bin->uses_vid;
       shader.instance_id_read_by_coordinate_shader =
-         pipeline->vs_bin->prog_data.vs->uses_iid;
+         prog_data_vs_bin->uses_iid;
       shader.vertex_id_read_by_vertex_shader =
-         pipeline->vs->prog_data.vs->uses_vid;
+         prog_data_vs->uses_vid;
       shader.instance_id_read_by_vertex_shader =
-         pipeline->vs->prog_data.vs->uses_iid;
+         prog_data_vs->uses_iid;
 
       /* Note: see previous note about adresses */
       /* shader.address_of_default_attribute_values */
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index 61254d52e93..61778d7fc7f 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -779,6 +779,20 @@ vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
    return ffs(vk_stage) - 1;
 }
 
+struct v3dv_shader_variant {
+   union {
+      struct v3d_prog_data *base;
+      struct v3d_vs_prog_data *vs;
+      struct v3d_fs_prog_data *fs;
+   } prog_data;
+
+   /* FIXME: using one bo per shader. Eventually we would be interested on
+    * reusing the same bo for all the shaders, like a bo per v3dv_pipeline for
+    * shaders.
+    */
+   struct v3dv_bo *assembly_bo;
+};
+
 /*
  * Per-stage info for each stage, useful so shader_module_compile_to_nir and
  * other methods doesn't have so many parameters.
@@ -805,29 +819,30 @@ struct v3dv_pipeline_stage {
 
    /** A name for this program, so you can track it in shader-db output. */
    uint32_t program_id;
+   /** How many variants of this program were compiled, for shader-db. */
+   uint32_t compiled_variant_count;
 
+   /* The following are the default v3d_key populated using
+    * VkCreateGraphicsPipelineCreateInfo. Variants will be created tweaking
+    * them, so we don't need to maintain a copy of that create info struct
+    * around
+    */
    union {
       struct v3d_key base;
       struct v3d_vs_key vs;
       struct v3d_fs_key fs;
    } key;
 
-   union {
-      struct v3d_prog_data *base;
-      struct v3d_vs_prog_data *vs;
-      struct v3d_fs_prog_data *fs;
-   } prog_data;
+   /* Cache with all the shader variant.
+    */
+   struct hash_table *cache;
+
+   struct v3dv_shader_variant *current_variant;
 
    /* FIXME: only make sense on vs, so perhaps a v3dv key like radv? or a kind
     * of pipe_draw_info
     */
    enum pipe_prim_type topology;
-
-   /* FIXME: using one bo per shader. Eventually we would be interested on
-    * reusing the same bo for all the shaders, like a bo per v3dv_pipeline for
-    * shaders.
-    */
-   struct v3dv_bo *assembly_bo;
 };
 
 /* FIXME: although the full vpm_config is not required at this point, as we
diff --git a/src/broadcom/vulkan/v3dv_uniforms.c b/src/broadcom/vulkan/v3dv_uniforms.c
index b4bfe421619..7b4dad84e9f 100644
--- a/src/broadcom/vulkan/v3dv_uniforms.c
+++ b/src/broadcom/vulkan/v3dv_uniforms.c
@@ -247,7 +247,8 @@ struct v3dv_cl_reloc
 v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
                     struct v3dv_pipeline_stage *p_stage)
 {
-   struct v3d_uniform_list *uinfo = &p_stage->prog_data.base->uniforms;
+   struct v3d_uniform_list *uinfo =
+      &p_stage->current_variant->prog_data.base->uniforms;
    struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
    struct v3dv_pipeline *pipeline = p_stage->pipeline;