diff --git a/src/broadcom/vulkan/v3dv_cmd_buffer.c b/src/broadcom/vulkan/v3dv_cmd_buffer.c
index fb0747ff907..5c2e83dfe83 100644
--- a/src/broadcom/vulkan/v3dv_cmd_buffer.c
+++ b/src/broadcom/vulkan/v3dv_cmd_buffer.c
@@ -2730,7 +2730,7 @@ update_fs_variant(struct v3dv_cmd_buffer *cmd_buffer)
                                VK_PIPELINE_BIND_POINT_GRAPHICS);
 
    VkResult vk_result;
-   variant = v3dv_get_shader_variant(p_stage, &local_key.base,
+   variant = v3dv_get_shader_variant(p_stage, NULL, &local_key.base,
                                      sizeof(struct v3d_fs_key),
                                      &cmd_buffer->device->alloc,
                                      &vk_result);
@@ -2761,7 +2761,7 @@ update_vs_variant(struct v3dv_cmd_buffer *cmd_buffer)
    cmd_buffer_populate_v3d_key(&local_key.base, cmd_buffer,
                                VK_PIPELINE_BIND_POINT_GRAPHICS);
 
-   variant = v3dv_get_shader_variant(p_stage, &local_key.base,
+   variant = v3dv_get_shader_variant(p_stage, NULL, &local_key.base,
                                      sizeof(struct v3d_vs_key),
                                      &cmd_buffer->device->alloc,
                                      &vk_result);
@@ -2782,7 +2782,7 @@ update_vs_variant(struct v3dv_cmd_buffer *cmd_buffer)
 
    cmd_buffer_populate_v3d_key(&local_key.base, cmd_buffer,
                                VK_PIPELINE_BIND_POINT_GRAPHICS);
-   variant = v3dv_get_shader_variant(p_stage, &local_key.base,
+   variant = v3dv_get_shader_variant(p_stage, NULL, &local_key.base,
                                      sizeof(struct v3d_vs_key),
                                      &cmd_buffer->device->alloc,
                                      &vk_result);
@@ -2813,7 +2813,7 @@ update_cs_variant(struct v3dv_cmd_buffer *cmd_buffer)
                                VK_PIPELINE_BIND_POINT_COMPUTE);
 
    VkResult result;
-   variant = v3dv_get_shader_variant(p_stage, &local_key,
+   variant = v3dv_get_shader_variant(p_stage, NULL, &local_key,
                                      sizeof(struct v3d_key),
                                      &cmd_buffer->device->alloc,
                                      &result);
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index 9e77656ebca..0d4dacf852e 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -1288,18 +1288,22 @@ pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src,
 }
 
 /* FIXME: right now this just asks for an bo for the exact size of the qpu
- * assembly. It would be good to be slighly smarter and having one "all
- * shaders" bo per pipeline, so each p_stage/variant would save their offset
- * on such. That is really relevant due the fact that bo are always aligned to
- * 4096, so that would allow to use less memory.
+ * assembly. It would be good to be able to re-use bos to avoid bo
+ * fragmentation. This could be tricky though, as right now we are uploading
+ * the assembly from two paths, when compiling a shader, or when deserializing
+ * from the pipeline cache. This also means that the same variant can be
+ * shared by different objects. So with the current approach it is clear who
+ * owns the assembly bo, but if shared, who owns the shared bo?
  *
  * For now one-bo per-assembly would work.
  *
  * Returns false if it was not able to allocate or map the assembly bo memory.
  */
 static bool
-upload_assembly(struct v3dv_pipeline_stage *p_stage,
+upload_assembly(struct v3dv_device *device,
                 struct v3dv_shader_variant *variant,
+                gl_shader_stage stage,
+                bool is_coord,
                 const void *data,
                 uint32_t size)
 {
@@ -1308,11 +1312,10 @@ upload_assembly(struct v3dv_pipeline_stage *p_stage,
     * have any bo
     */
    assert(variant->assembly_bo == NULL);
-   struct v3dv_device *device = p_stage->pipeline->device;
 
-   switch (p_stage->stage) {
+   switch (stage) {
    case MESA_SHADER_VERTEX:
-      name = (p_stage->is_coord == true) ? "coord_shader_assembly" :
+      name = (is_coord == true) ? "coord_shader_assembly" :
          "vertex_shader_assembly";
       break;
    case MESA_SHADER_FRAGMENT:
@@ -1340,92 +1343,30 @@ upload_assembly(struct v3dv_pipeline_stage *p_stage,
 
    memcpy(bo->map, data, size);
 
-   v3dv_bo_unmap(device, bo);
-
+   /* We don't unmap the assembly bo, as we would use to gather the assembly
+    * when serializing the variant.
+    */
    variant->assembly_bo = bo;
 
    return true;
 }
 
-/* For a given key, it returns the compiled version of the shader. If it was
- * already compiled, it gets it from the p_stage cache, if not it compiles is
- * through the v3d compiler
+/*
+ * Adds a shader variant to the pipeline shader variant cache, updates
+ * pipeline spill structures if needed.
  *
- * If the method returns NULL it means that it was not able to allocate the
- * resources for the variant. out_vk_result would return which OOM applies.
- *
- * Returns a new reference of the shader_variant to the caller.
+ * Assumes that the caller already checked that the variant is not on such
+ * cache.
  */
-struct v3dv_shader_variant*
-v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
-                        struct v3d_key *key,
-                        size_t key_size,
-                        const VkAllocationCallbacks *pAllocator,
-                        VkResult *out_vk_result)
+static void
+pipeline_add_variant_to_cache(struct v3dv_pipeline_stage *p_stage,
+                              struct v3d_key *key,
+                              size_t key_size,
+                              struct v3dv_shader_variant *variant)
 {
    struct hash_table *ht = p_stage->cache;
-   struct hash_entry *entry = _mesa_hash_table_search(ht, key);
-
-   if (entry) {
-      *out_vk_result = VK_SUCCESS;
-      v3dv_shader_variant_ref(entry->data);
-      return entry->data;
-   }
-
    struct v3dv_pipeline *pipeline = p_stage->pipeline;
    struct v3dv_device *device = pipeline->device;
-   struct v3dv_shader_variant *variant =
-      vk_zalloc(&device->alloc, sizeof(*variant), 8,
-                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-
-   if (variant == NULL) {
-      *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
-      return NULL;
-   }
-   variant->ref_cnt = 1;
-
-   struct v3dv_physical_device *physical_device =
-      &pipeline->device->instance->physicalDevice;
-   const struct v3d_compiler *compiler = physical_device->compiler;
-
-   uint32_t variant_id = p_atomic_inc_return(&p_stage->compiled_variant_count);
-
-   if (V3D_DEBUG & (V3D_DEBUG_NIR |
-                    v3d_debug_flag_for_shader_stage(p_stage->stage))) {
-      fprintf(stderr, "Just before v3d_compile: %s prog %d variant %d NIR:\n",
-              gl_shader_stage_name(p_stage->stage),
-              p_stage->program_id,
-              variant_id);
-      nir_print_shader(p_stage->nir, stderr);
-      fprintf(stderr, "\n");
-   }
-
-   uint64_t *qpu_insts;
-   uint32_t qpu_insts_size;
-
-   qpu_insts = v3d_compile(compiler,
-                           key, &variant->prog_data.base,
-                           p_stage->nir,
-                           shader_debug_output, NULL,
-                           p_stage->program_id,
-                           variant_id,
-                           &qpu_insts_size);
-
-   if (!qpu_insts) {
-      fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
-              gl_shader_stage_name(p_stage->stage),
-              p_stage->program_id);
-   } else {
-      if (!upload_assembly(p_stage, variant, qpu_insts, qpu_insts_size)) {
-         free(qpu_insts);
-         v3dv_shader_variant_unref(device, variant);
-
-         *out_vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
-         return NULL;
-      }
-   }
-
-   free(qpu_insts);
 
    if (ht) {
       struct v3d_key *dup_key;
@@ -1450,8 +1391,184 @@ v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
          v3dv_bo_alloc(device, total_spill_size, "spill", true);
       pipeline->spill.size_per_thread = variant->prog_data.base->spill_size;
    }
+}
+
+
+static void
+pipeline_hash_variant(const struct v3dv_pipeline_stage *p_stage,
+                      struct v3d_key *key,
+                      size_t key_size,
+                      unsigned char *sha1_out)
+{
+   struct mesa_sha1 ctx;
+   struct v3dv_pipeline *pipeline = p_stage->pipeline;
+   _mesa_sha1_init(&ctx);
+
+   if (p_stage->stage == MESA_SHADER_COMPUTE) {
+      _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1));
+   } else {
+      /* We need to include both on the sha1 key as one could affect the other
+       * during linking (like if vertex output are constants, then the
+       * fragment shader would load_const intead of load_input). An
+       * alternative would be to use the serialized nir, but that seems like
+       * an overkill
+       */
+      _mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,
+                        sizeof(pipeline->vs->shader_sha1));
+      _mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,
+                        sizeof(pipeline->fs->shader_sha1));
+   }
+   _mesa_sha1_update(&ctx, key, key_size);
+
+   _mesa_sha1_final(&ctx, sha1_out);
+}
+
+/*
+ * Creates a new shader_variant_create. Note that for prog_data is const, so
+ * it is used only to copy to their own prog_data
+ *
+ * Creation includes allocating a shader source bo, and filling it up.
+ */
+struct v3dv_shader_variant *
+v3dv_shader_variant_create(struct v3dv_device *device,
+                           gl_shader_stage stage,
+                           bool is_coord,
+                           const unsigned char *variant_sha1,
+                           struct v3d_prog_data *prog_data,
+                           uint32_t prog_data_size,
+                           const uint64_t *qpu_insts,
+                           uint32_t qpu_insts_size,
+                           VkResult *out_vk_result)
+{
+   struct v3dv_shader_variant *variant =
+      vk_zalloc(&device->alloc, sizeof(*variant), 8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+   if (variant == NULL) {
+      *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      return NULL;
+   }
+
+   variant->ref_cnt = 1;
+   variant->stage = stage;
+   variant->is_coord = is_coord;
+   memcpy(variant->variant_sha1, variant_sha1, sizeof(variant->variant_sha1));
+   variant->prog_data_size = prog_data_size;
+   variant->prog_data.base = prog_data;
+
+   if (qpu_insts) {
+      if (!upload_assembly(device, variant, stage, is_coord,
+                           qpu_insts, qpu_insts_size)) {
+         ralloc_free(variant->prog_data.base);
+         vk_free(&device->alloc, variant);
+
+         *out_vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+         return NULL;
+      }
+      variant->qpu_insts_size = qpu_insts_size;
+   }
 
    *out_vk_result = VK_SUCCESS;
+
+   return variant;
+}
+
+/* For a given key, it returns the compiled version of the shader. If it was
+ * already compiled, it gets it from the p_stage cache, if not it compiles is
+ * through the v3d compiler
+ *
+ * If the method returns NULL it means that it was not able to allocate the
+ * resources for the variant. out_vk_result would return which OOM applies.
+ *
+ * Returns a new reference of the shader_variant to the caller.
+ */
+struct v3dv_shader_variant*
+v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
+                        struct v3dv_pipeline_cache *cache,
+                        struct v3d_key *key,
+                        size_t key_size,
+                        const VkAllocationCallbacks *pAllocator,
+                        VkResult *out_vk_result)
+{
+   /* We first try to get the variant from the internal p_stage cache
+    * variant
+    */
+   struct hash_table *ht = p_stage->cache;
+   struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+
+   if (entry) {
+      *out_vk_result = VK_SUCCESS;
+      v3dv_shader_variant_ref(entry->data);
+      return entry->data;
+   }
+
+   /* Now we search on the pipeline cache if available */
+   struct v3dv_pipeline *pipeline = p_stage->pipeline;
+   unsigned char variant_sha1[20];
+   pipeline_hash_variant(p_stage, key, key_size, variant_sha1);
+
+   struct v3dv_shader_variant *variant =
+      v3dv_pipeline_cache_search_for_variant(pipeline,
+                                             cache,
+                                             variant_sha1);
+
+   if (variant) {
+      pipeline_add_variant_to_cache(p_stage, key, key_size, variant);
+      *out_vk_result = VK_SUCCESS;
+      return variant;
+   }
+
+   /* If we don't find the variant in any cache, we compile one and add the
+    * variant to the cache
+    */
+   struct v3dv_device *device = pipeline->device;
+   struct v3dv_physical_device *physical_device =
+      &pipeline->device->instance->physicalDevice;
+   const struct v3d_compiler *compiler = physical_device->compiler;
+
+   uint32_t variant_id = p_atomic_inc_return(&p_stage->compiled_variant_count);
+
+   if (V3D_DEBUG & (V3D_DEBUG_NIR |
+                    v3d_debug_flag_for_shader_stage(p_stage->stage))) {
+      fprintf(stderr, "Just before v3d_compile: %s prog %d variant %d NIR:\n",
+              gl_shader_stage_name(p_stage->stage),
+              p_stage->program_id,
+              variant_id);
+      nir_print_shader(p_stage->nir, stderr);
+      fprintf(stderr, "\n");
+   }
+
+   uint64_t *qpu_insts;
+   uint32_t qpu_insts_size;
+   struct v3d_prog_data *prog_data;
+
+   qpu_insts = v3d_compile(compiler,
+                           key, &prog_data,
+                           p_stage->nir,
+                           shader_debug_output, NULL,
+                           p_stage->program_id,
+                           variant_id,
+                           &qpu_insts_size);
+
+   if (!qpu_insts) {
+      fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
+              gl_shader_stage_name(p_stage->stage),
+              p_stage->program_id);
+   }
+
+   variant = v3dv_shader_variant_create(device, p_stage->stage, p_stage->is_coord,
+                                        variant_sha1,
+                                        prog_data, v3d_prog_data_size(p_stage->stage),
+                                        qpu_insts, qpu_insts_size,
+                                        out_vk_result);
+   if (qpu_insts)
+      free(qpu_insts);
+
+   if (*out_vk_result == VK_SUCCESS) {
+      pipeline_add_variant_to_cache(p_stage, key, key_size, variant);
+      v3dv_pipeline_cache_upload_variant(pipeline, cache, variant);
+   }
+
    return variant;
 }
 
@@ -1731,6 +1848,12 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
       next_stage = stages[stage];
    }
 
+   /* Assign p_stage to the pipeline. We need to do this before start to
+    * compile because p_stage sha1 is computed with all the stages
+    */
+   pipeline->vs = stages[MESA_SHADER_VERTEX];
+   pipeline->fs = stages[MESA_SHADER_FRAGMENT];
+
    /* Compiling to vir. Note that at this point we are compiling a default
     * variant. Binding to textures, and other stuff (that would need a
     * cmd_buffer) would need a recompile
@@ -1757,7 +1880,6 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
           */
          lower_vs_io(p_stage->nir);
 
-         pipeline->vs = p_stage;
          pipeline->vs_bin = pipeline_stage_create_vs_bin(pipeline->vs, pAllocator);
 
          /* FIXME: likely this to be moved to a gather info method to a full
@@ -1776,7 +1898,7 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
          pipeline_populate_v3d_vs_key(key, pCreateInfo, pipeline->vs);
          VkResult vk_result;
          pipeline->vs->current_variant =
-            v3dv_get_shader_variant(pipeline->vs, &key->base, sizeof(*key),
+            v3dv_get_shader_variant(pipeline->vs, cache, &key->base, sizeof(*key),
                                     pAllocator, &vk_result);
          if (vk_result != VK_SUCCESS)
             return vk_result;
@@ -1784,7 +1906,7 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
          key = &pipeline->vs_bin->key.vs;
          pipeline_populate_v3d_vs_key(key, pCreateInfo, pipeline->vs_bin);
          pipeline->vs_bin->current_variant =
-            v3dv_get_shader_variant(pipeline->vs_bin, &key->base, sizeof(*key),
+            v3dv_get_shader_variant(pipeline->vs_bin, cache, &key->base, sizeof(*key),
                                     pAllocator, &vk_result);
          if (vk_result != VK_SUCCESS)
             return vk_result;
@@ -1794,8 +1916,6 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
       case MESA_SHADER_FRAGMENT: {
          struct v3d_fs_key *key = &p_stage->key.fs;
 
-         pipeline->fs = p_stage;
-
          pipeline_populate_v3d_fs_key(key, pCreateInfo, p_stage,
                                       get_ucp_enable_mask(stages));
 
@@ -1803,7 +1923,7 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
 
          VkResult vk_result;
          p_stage->current_variant =
-            v3dv_get_shader_variant(p_stage, &key->base, sizeof(*key),
+            v3dv_get_shader_variant(p_stage, cache, &key->base, sizeof(*key),
                                     pAllocator, &vk_result);
          if (vk_result != VK_SUCCESS)
             return vk_result;
@@ -2821,7 +2941,7 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline,
 
     VkResult result;
     p_stage->current_variant =
-      v3dv_get_shader_variant(p_stage, key, sizeof(*key), alloc, &result);
+       v3dv_get_shader_variant(p_stage, cache, key, sizeof(*key), alloc, &result);
    return result;
 }
 
diff --git a/src/broadcom/vulkan/v3dv_pipeline_cache.c b/src/broadcom/vulkan/v3dv_pipeline_cache.c
index d0874d1326a..7d290a54ed8 100644
--- a/src/broadcom/vulkan/v3dv_pipeline_cache.c
+++ b/src/broadcom/vulkan/v3dv_pipeline_cache.c
@@ -56,6 +56,10 @@ cache_dump_stats(struct v3dv_pipeline_cache *cache)
    fprintf(stderr, "  NIR cache entries:      %d\n", cache->nir_stats.count);
    fprintf(stderr, "  NIR cache miss count:   %d\n", cache->nir_stats.miss);
    fprintf(stderr, "  NIR cache hit  count:   %d\n", cache->nir_stats.hit);
+
+   fprintf(stderr, "  variant cache entries:      %d\n", cache->variant_stats.count);
+   fprintf(stderr, "  variant cache miss count:   %d\n", cache->variant_stats.miss);
+   fprintf(stderr, "  variant cache hit  count:   %d\n", cache->variant_stats.hit);
 }
 
 void
@@ -186,12 +190,154 @@ pipeline_cache_init(struct v3dv_pipeline_cache *cache,
       cache->nir_stats.miss = 0;
       cache->nir_stats.hit = 0;
       cache->nir_stats.count = 0;
+
+      cache->variant_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
+                                                     sha1_compare_func);
+      cache->variant_stats.miss = 0;
+      cache->variant_stats.hit = 0;
+      cache->variant_stats.count = 0;
    } else {
       cache->nir_cache = NULL;
+      cache->variant_cache = NULL;
    }
 
 }
 
+struct v3dv_shader_variant*
+v3dv_pipeline_cache_search_for_variant(struct v3dv_pipeline *pipeline,
+                                       struct v3dv_pipeline_cache *cache,
+                                       unsigned char sha1_key[20])
+{
+   if (!cache || !cache->nir_cache)
+      return NULL;
+
+   if (unlikely(dump_stats)) {
+      char sha1buf[41];
+      _mesa_sha1_format(sha1buf, sha1_key);
+
+      fprintf(stderr, "pipeline cache %p, search variant with key %s\n", cache, sha1buf);
+   }
+
+   pthread_mutex_lock(&cache->mutex);
+
+   struct hash_entry *entry =
+      _mesa_hash_table_search(cache->variant_cache, sha1_key);
+
+   if (entry) {
+      struct v3dv_shader_variant *variant =
+         (struct v3dv_shader_variant *) entry->data;
+
+      if (unlikely(dump_stats)) {
+         fprintf(stderr, "\tcache hit: %p\n", variant);
+         cache->variant_stats.hit++;
+         cache_dump_stats(cache);
+      }
+
+      if (variant)
+         v3dv_shader_variant_ref(variant);
+
+      pthread_mutex_unlock(&cache->mutex);
+      return variant;
+   }
+
+   if (unlikely(dump_stats)) {
+      fprintf(stderr, "\tcache miss\n");
+      cache->variant_stats.miss++;
+      cache_dump_stats(cache);
+   }
+
+   pthread_mutex_unlock(&cache->mutex);
+   return NULL;
+}
+
+void
+v3dv_pipeline_cache_upload_variant(struct v3dv_pipeline *pipeline,
+                                   struct v3dv_pipeline_cache *cache,
+                                   struct v3dv_shader_variant  *variant)
+{
+   if (!cache || !cache->variant_cache)
+      return;
+
+   pthread_mutex_lock(&cache->mutex);
+   struct hash_entry *entry =
+      _mesa_hash_table_search(cache->variant_cache, variant->variant_sha1);
+
+   if (entry) {
+      pthread_mutex_unlock(&cache->mutex);
+      return;
+   }
+
+   v3dv_shader_variant_ref(variant);
+   _mesa_hash_table_insert(cache->variant_cache, variant->variant_sha1, variant);
+   if (unlikely(dump_stats)) {
+      char sha1buf[41];
+      _mesa_sha1_format(sha1buf, variant->variant_sha1);
+
+      fprintf(stderr, "pipeline cache %p, new variant entry with key %s\n\t%p\n",
+              cache, sha1buf, variant);
+      cache->variant_stats.count++;
+      cache_dump_stats(cache);
+   }
+
+   pthread_mutex_unlock(&cache->mutex);
+}
+
+static struct v3dv_shader_variant*
+shader_variant_create_from_blob(struct v3dv_device *device,
+                                struct blob_reader *blob)
+{
+   VkResult result;
+
+   gl_shader_stage stage = blob_read_uint32(blob);
+   bool is_coord = blob_read_uint8(blob);
+
+   const unsigned char *variant_sha1 = blob_read_bytes(blob, 20);
+
+   uint32_t prog_data_size = blob_read_uint32(blob);
+   /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
+   assert(prog_data_size == v3d_prog_data_size(stage));
+
+   const void *prog_data = blob_read_bytes(blob, prog_data_size);
+   if (blob->overrun)
+      return NULL;
+
+   uint32_t ulist_count = blob_read_uint32(blob);
+   uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
+   const void *contents_data = blob_read_bytes(blob, contents_size);
+   if (blob->overrun)
+      return NULL;
+
+   uint ulist_data_size = sizeof(uint32_t) * ulist_count;
+   const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
+   if (blob->overrun)
+      return NULL;
+
+   uint32_t qpu_insts_size = blob_read_uint32(blob);
+   const uint64_t *qpu_insts = blob_read_bytes(blob, qpu_insts_size);
+   if (blob->overrun)
+      return NULL;
+
+   /* shader_variant_create expects a newly created prog_data for their own,
+    * as it is what the v3d compiler returns. So we are also allocating one
+    * (including the uniform list) and filled it up with the data that we read
+    * from the blob
+    */
+   struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
+   memcpy(new_prog_data, prog_data, prog_data_size);
+   struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
+   ulist->count = ulist_count;
+   ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
+   memcpy(ulist->contents, contents_data, contents_size);
+   ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
+   memcpy(ulist->data, ulist_data_data, ulist_data_size);
+
+   return v3dv_shader_variant_create(device, stage, is_coord,
+                                     variant_sha1,
+                                     new_prog_data, prog_data_size,
+                                     qpu_insts, qpu_insts_size,
+                                     &result);
+}
+
 static void
 pipeline_cache_load(struct v3dv_pipeline_cache *cache,
                     size_t size,
@@ -201,6 +347,21 @@ pipeline_cache_load(struct v3dv_pipeline_cache *cache,
    struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
    struct vk_pipeline_cache_header header;
 
+   if (cache->variant_cache == NULL)
+      return;
+
+   struct blob_reader blob;
+   blob_reader_init(&blob, data, size);
+
+   blob_copy_bytes(&blob, &header, sizeof(header));
+   uint32_t count = blob_read_uint32(&blob);
+   if (blob.overrun)
+      return;
+
+   if (unlikely(dump_stats)) {
+      fprintf(stderr, "pipeline cache %p, loading %i variant entries\n", cache, count);
+   }
+
    if (size < sizeof(header))
       return;
    memcpy(&header, data, sizeof(header));
@@ -215,9 +376,16 @@ pipeline_cache_load(struct v3dv_pipeline_cache *cache,
    if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
       return;
 
-   /* FIXME: at this point we only verify the header but we dont really load
-    * any data. pending to implement serialize/deserialize among other things.
-    */
+   for (uint32_t i = 0; i < count; i++) {
+      struct v3dv_shader_variant *variant =
+         shader_variant_create_from_blob(device, &blob);
+      if (!variant)
+         break;
+      _mesa_hash_table_insert(cache->variant_cache, variant->variant_sha1, variant);
+      if (unlikely(dump_stats))
+         cache->variant_stats.count++;
+   }
+
 }
 
 VkResult
@@ -271,6 +439,15 @@ v3dv_DestroyPipelineCache(VkDevice _device,
          ralloc_free(entry->data);
 
       _mesa_hash_table_destroy(cache->nir_cache, NULL);
+
+      hash_table_foreach(cache->variant_cache, entry) {
+         struct v3dv_shader_variant *variant = entry->data;
+         if (variant)
+            v3dv_shader_variant_unref(device, variant);
+      }
+
+      _mesa_hash_table_destroy(cache->variant_cache, NULL);
+
    }
 
    vk_free2(&device->alloc, pAllocator, cache);
@@ -288,6 +465,30 @@ v3dv_MergePipelineCaches(VkDevice device,
    return VK_SUCCESS;
 }
 
+static bool
+shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
+                             struct blob *blob)
+{
+   blob_write_uint32(blob, variant->stage);
+   blob_write_uint8(blob, variant->is_coord);
+
+   blob_write_bytes(blob, variant->variant_sha1, sizeof(variant->variant_sha1));
+
+   blob_write_uint32(blob, variant->prog_data_size);
+   blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
+
+   struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
+   blob_write_uint32(blob, ulist->count);
+   blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
+   blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
+
+   blob_write_uint32(blob, variant->qpu_insts_size);
+   assert(variant->assembly_bo->map);
+   blob_write_bytes(blob, variant->assembly_bo->map, variant->qpu_insts_size);
+
+   return !blob->out_of_memory;
+}
+
 VkResult
 v3dv_GetPipelineCacheData(VkDevice _device,
                           VkPipelineCache _cache,
@@ -296,32 +497,68 @@ v3dv_GetPipelineCacheData(VkDevice _device,
 {
    V3DV_FROM_HANDLE(v3dv_device, device, _device);
    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
+
+   struct blob blob;
+   if (pData) {
+      blob_init_fixed(&blob, pData, *pDataSize);
+   } else {
+      blob_init_fixed(&blob, NULL, SIZE_MAX);
+   }
+
    struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
-   struct vk_pipeline_cache_header *header;
    VkResult result = VK_SUCCESS;
 
    pthread_mutex_lock(&cache->mutex);
 
-   /* FIXME: at this point the cache data is just the header */
-   const size_t size = sizeof(*header);
-   if (pData == NULL) {
-      pthread_mutex_unlock(&cache->mutex);
-      *pDataSize = size;
-      return VK_SUCCESS;
-   }
-   if (*pDataSize < sizeof(*header)) {
-      pthread_mutex_unlock(&cache->mutex);
+   struct vk_pipeline_cache_header header = {
+      .header_size = sizeof(struct vk_pipeline_cache_header),
+      .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
+      .vendor_id = v3dv_physical_device_vendor_id(pdevice),
+      .device_id = v3dv_physical_device_device_id(pdevice),
+   };
+   memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
+   blob_write_bytes(&blob, &header, sizeof(header));
+
+   uint32_t count = 0;
+   intptr_t count_offset = blob_reserve_uint32(&blob);
+   if (count_offset < 0) {
       *pDataSize = 0;
+      blob_finish(&blob);
+      pthread_mutex_unlock(&cache->mutex);
       return VK_INCOMPLETE;
    }
 
-   header = pData;
-   header->header_size = sizeof(*header);
-   header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
-   header->vendor_id = v3dv_physical_device_vendor_id(pdevice);
-   header->device_id = v3dv_physical_device_device_id(pdevice);
-   memcpy(header->uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
+   if (cache->variant_cache) {
+      hash_table_foreach(cache->variant_cache, entry) {
+         struct v3dv_shader_variant *variant = entry->data;
+
+         size_t save_size = blob.size;
+         if (!shader_variant_write_to_blob(variant, &blob)) {
+            /* If it fails reset to the previous size and bail */
+            blob.size = save_size;
+            pthread_mutex_unlock(&cache->mutex);
+            result = VK_INCOMPLETE;
+            break;
+         }
+
+         count++;
+      }
+   }
+
+   blob_overwrite_uint32(&blob, count_offset, count);
+
+   *pDataSize = blob.size;
+
+   blob_finish(&blob);
+
+   if (unlikely(dump_stats)) {
+      assert(count <= cache->variant_stats.count);
+      fprintf(stderr, "GetPipelineCacheData: serializing cache %p, "
+              "%i variant entries, %u DataSize\n",
+              cache, count, (uint32_t) *pDataSize);
+   }
 
    pthread_mutex_unlock(&cache->mutex);
+
    return result;
 }
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index a58063ed4ad..115840ace50 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -278,6 +278,9 @@ struct v3dv_pipeline_cache {
 
    struct hash_table *nir_cache;
    struct v3dv_pipeline_cache_stats nir_stats;
+
+   struct hash_table *variant_cache;
+   struct v3dv_pipeline_cache_stats variant_stats;
 };
 
 struct v3dv_device {
@@ -1221,6 +1224,14 @@ vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
 struct v3dv_shader_variant {
    uint32_t ref_cnt;
 
+   gl_shader_stage stage;
+   bool is_coord;
+
+   /* key for the pipeline cache, it is p_stage shader_sha1 + v3d compiler
+    * sha1
+    */
+   unsigned char variant_sha1[20];
+
    union {
       struct v3d_prog_data *base;
       struct v3d_vs_prog_data *vs;
@@ -1228,11 +1239,16 @@ struct v3dv_shader_variant {
       struct v3d_compute_prog_data *cs;
    } prog_data;
 
+   /* We explicitly save the prog_data_size as it would make easier to
+    * serialize
+    */
+   uint32_t prog_data_size;
    /* FIXME: using one bo per shader. Eventually we would be interested on
     * reusing the same bo for all the shaders, like a bo per v3dv_pipeline for
     * shaders.
     */
    struct v3dv_bo *assembly_bo;
+   uint32_t qpu_insts_size;
 };
 
 /*
@@ -1278,11 +1294,13 @@ struct v3dv_pipeline_stage {
       struct v3d_fs_key fs;
    } key;
 
-   /* Cache with all the shader variant.
+   /* Cache with all the shader variants built for this pipeline. This one is
+    * required over the pipeline cache because we still allow to create shader
+    * variants after Pipeline creation.
     */
    struct hash_table *cache;
 
-   struct v3dv_shader_variant *current_variant;
+   struct v3dv_shader_variant*current_variant;
 
    /* FIXME: only make sense on vs, so perhaps a v3dv key like radv? or a kind
     * of pipe_draw_info
@@ -1712,11 +1730,23 @@ struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_
 
 struct v3dv_shader_variant *
 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
+                        struct v3dv_pipeline_cache *cache,
                         struct v3d_key *key,
                         size_t key_size,
                         const VkAllocationCallbacks *pAllocator,
                         VkResult *out_vk_result);
 
+struct v3dv_shader_variant *
+v3dv_shader_variant_create(struct v3dv_device *device,
+                           gl_shader_stage stage,
+                           bool is_coord,
+                           const unsigned char *variant_sha1,
+                           struct v3d_prog_data *prog_data,
+                           uint32_t prog_data_size,
+                           const uint64_t *qpu_insts,
+                           uint32_t qpu_insts_size,
+                           VkResult *out_vk_result);
+
 void
 v3dv_shader_variant_destroy(struct v3dv_device *device,
                             struct v3dv_shader_variant *variant);
@@ -1786,6 +1816,16 @@ nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
                                                const nir_shader_compiler_options *nir_options,
                                                unsigned char sha1_key[20]);
 
+struct v3dv_shader_variant*
+v3dv_pipeline_cache_search_for_variant(struct v3dv_pipeline *pipeline,
+                                       struct v3dv_pipeline_cache *cache,
+                                       unsigned char sha1_key[20]);
+
+void
+v3dv_pipeline_cache_upload_variant(struct v3dv_pipeline *pipeline,
+                                   struct v3dv_pipeline_cache *cache,
+                                   struct v3dv_shader_variant  *variant);
+
 
 #define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType)   \
                                                         \