Files
mesa/src/vulkan/runtime/vk_pipeline.c
Sagar Ghuge d8447fd392 vulkan/runtime: Account for pipeline libraries stage count
Don't excludes stages coming from pipeline libraries. This caused valid
group indices referring to library stages to be dropped, leading to
mismatched stage_count.

Fixes: e05a9b77b6 ("vulkan/runtime: split rt shaders hashing from compile")
Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38669>
2025-11-26 22:17:57 +00:00

4484 lines
165 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
* Copyright © 2022 Collabora, LTD
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "vk_pipeline.h"
#include "vk_alloc.h"
#include "vk_common_entrypoints.h"
#include "vk_command_buffer.h"
#include "vk_descriptor_set_layout.h"
#include "vk_device.h"
#include "vk_graphics_state.h"
#include "vk_log.h"
#include "vk_nir.h"
#include "vk_physical_device.h"
#include "vk_physical_device_features.h"
#include "vk_pipeline_layout.h"
#include "vk_shader.h"
#include "vk_shader_module.h"
#include "vk_util.h"
#include "nir_serialize.h"
#include "nir.h"
#include "shader_enums.h"
#include "util/mesa-sha1.h"
struct vk_pipeline_binary {
struct vk_object_base base;
blake3_hash key;
size_t size;
/* The first byte is boolean of whether the binary is precomp or not,
* following by the serialized data.
*/
uint8_t data[];
};
VK_DEFINE_NONDISP_HANDLE_CASTS(vk_pipeline_binary, base, VkPipelineBinaryKHR,
VK_OBJECT_TYPE_PIPELINE_BINARY_KHR);
bool
vk_pipeline_shader_stage_is_null(const VkPipelineShaderStageCreateInfo *info)
{
if (info->module != VK_NULL_HANDLE)
return false;
vk_foreach_struct_const(ext, info->pNext) {
if (ext->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO ||
ext->sType == VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT)
return false;
}
return true;
}
bool
vk_pipeline_shader_stage_has_identifier(const VkPipelineShaderStageCreateInfo *info)
{
const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *id_info =
vk_find_struct_const(info->pNext, PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT);
return id_info && id_info->identifierSize != 0;
}
static nir_shader *
get_builtin_nir(const VkPipelineShaderStageCreateInfo *info)
{
VK_FROM_HANDLE(vk_shader_module, module, info->module);
nir_shader *nir = NULL;
if (module != NULL) {
nir = module->nir;
} else {
const VkPipelineShaderStageNirCreateInfoMESA *nir_info =
vk_find_struct_const(info->pNext, PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA);
if (nir_info != NULL)
nir = nir_info->nir;
}
if (nir == NULL)
return NULL;
assert(nir->info.stage == vk_to_mesa_shader_stage(info->stage));
ASSERTED nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
assert(strcmp(entrypoint->function->name, info->pName) == 0);
assert(info->pSpecializationInfo == NULL);
return nir;
}
static uint32_t
get_required_subgroup_size(const void *info_pNext)
{
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *rss_info =
vk_find_struct_const(info_pNext,
PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
return rss_info != NULL ? rss_info->requiredSubgroupSize : 0;
}
void
vk_set_subgroup_size(struct vk_device *device,
nir_shader *shader,
uint32_t spirv_version,
const void *info_pNext,
bool allow_varying,
bool require_full)
{
struct vk_properties *properties = &device->physical->properties;
uint32_t req_subgroup_size = get_required_subgroup_size(info_pNext);
if (req_subgroup_size) {
assert(util_is_power_of_two_nonzero(req_subgroup_size));
assert(req_subgroup_size >= 1 && req_subgroup_size <= 128);
shader->info.api_subgroup_size = req_subgroup_size;
shader->info.max_subgroup_size = req_subgroup_size;
shader->info.min_subgroup_size = req_subgroup_size;
} else if (allow_varying || spirv_version >= 0x10600) {
/* Starting with SPIR-V 1.6, varying subgroup size is the default */
} else if (properties->subgroupSize) {
shader->info.api_subgroup_size = properties->subgroupSize;
shader->info.max_subgroup_size = properties->subgroupSize;
if (require_full) {
assert(shader->info.stage == MESA_SHADER_COMPUTE ||
shader->info.stage == MESA_SHADER_MESH ||
shader->info.stage == MESA_SHADER_TASK);
shader->info.min_subgroup_size = properties->subgroupSize;
}
}
if (properties->maxSubgroupSize) {
assert(properties->minSubgroupSize);
shader->info.max_subgroup_size =
MIN2(shader->info.max_subgroup_size, properties->maxSubgroupSize);
shader->info.min_subgroup_size =
MAX2(shader->info.min_subgroup_size, properties->minSubgroupSize);
}
assert(shader->info.max_subgroup_size >= shader->info.min_subgroup_size);
}
VkResult
vk_pipeline_shader_stage_to_nir(struct vk_device *device,
VkPipelineCreateFlags2KHR pipeline_flags,
const VkPipelineShaderStageCreateInfo *info,
const struct spirv_to_nir_options *spirv_options,
const struct nir_shader_compiler_options *nir_options,
void *mem_ctx, nir_shader **nir_out)
{
VK_FROM_HANDLE(vk_shader_module, module, info->module);
const mesa_shader_stage stage = vk_to_mesa_shader_stage(info->stage);
assert(info->sType == VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO);
nir_shader *builtin_nir = get_builtin_nir(info);
if (builtin_nir != NULL) {
nir_validate_shader(builtin_nir, "internal shader");
nir_shader *clone = nir_shader_clone(mem_ctx, builtin_nir);
if (clone == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
assert(clone->options == NULL || clone->options == nir_options);
clone->options = nir_options;
*nir_out = clone;
return VK_SUCCESS;
}
const uint32_t *spirv_data;
uint32_t spirv_size;
if (module != NULL) {
spirv_data = (uint32_t *)module->data;
spirv_size = module->size;
} else {
const VkShaderModuleCreateInfo *minfo =
vk_find_struct_const(info->pNext, SHADER_MODULE_CREATE_INFO);
if (unlikely(minfo == NULL)) {
return vk_errorf(device, VK_ERROR_UNKNOWN,
"No shader module provided");
}
spirv_data = minfo->pCode;
spirv_size = minfo->codeSize;
}
nir_shader *nir = vk_spirv_to_nir(device, spirv_data, spirv_size, stage,
info->pName,
info->pSpecializationInfo,
spirv_options, nir_options,
false /* internal */,
mem_ctx);
if (nir == NULL)
return vk_errorf(device, VK_ERROR_UNKNOWN, "spirv_to_nir failed");
vk_set_subgroup_size(
device, nir,
vk_spirv_version(spirv_data, spirv_size),
info->pNext,
info->flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT,
info->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT);
if (pipeline_flags & VK_PIPELINE_CREATE_2_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR)
NIR_PASS(_, nir, nir_lower_view_index_to_device_index);
*nir_out = nir;
return VK_SUCCESS;
}
static void
vk_pipeline_hash_shader_stage_blake3(VkPipelineCreateFlags2KHR pipeline_flags,
const VkPipelineShaderStageCreateInfo *info,
const struct vk_pipeline_robustness_state *rstate,
blake3_hash stage_blake3)
{
VK_FROM_HANDLE(vk_shader_module, module, info->module);
const nir_shader *builtin_nir = get_builtin_nir(info);
if (builtin_nir != NULL) {
/* Internal NIR module: serialize and hash the NIR shader.
* We don't need to hash other info fields since they should match the
* NIR data.
*/
struct blob blob;
blob_init(&blob);
nir_serialize(&blob, builtin_nir, false);
assert(!blob.out_of_memory);
_mesa_blake3_compute(blob.data, blob.size, stage_blake3);
blob_finish(&blob);
return;
}
const VkShaderModuleCreateInfo *minfo =
vk_find_struct_const(info->pNext, SHADER_MODULE_CREATE_INFO);
const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo =
vk_find_struct_const(info->pNext, PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT);
struct mesa_blake3 ctx;
_mesa_blake3_init(&ctx);
/* We only care about one of the pipeline flags */
pipeline_flags &= VK_PIPELINE_CREATE_2_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR;
_mesa_blake3_update(&ctx, &pipeline_flags, sizeof(pipeline_flags));
_mesa_blake3_update(&ctx, &info->flags, sizeof(info->flags));
assert(util_bitcount(info->stage) == 1);
_mesa_blake3_update(&ctx, &info->stage, sizeof(info->stage));
if (module) {
_mesa_blake3_update(&ctx, module->hash, sizeof(module->hash));
} else if (minfo) {
_mesa_blake3_update(&ctx, minfo->pCode, minfo->codeSize);
} else {
/* It is legal to pass in arbitrary identifiers as long as they don't exceed
* the limit. Shaders with bogus identifiers are more or less guaranteed to fail. */
assert(iinfo);
assert(iinfo->identifierSize <= VK_MAX_SHADER_MODULE_IDENTIFIER_SIZE_EXT);
_mesa_blake3_update(&ctx, iinfo->pIdentifier, iinfo->identifierSize);
}
if (rstate)
_mesa_blake3_update(&ctx, rstate, sizeof(*rstate));
_mesa_blake3_update(&ctx, info->pName, strlen(info->pName));
if (info->pSpecializationInfo) {
_mesa_blake3_update(&ctx, info->pSpecializationInfo->pMapEntries,
info->pSpecializationInfo->mapEntryCount *
sizeof(*info->pSpecializationInfo->pMapEntries));
_mesa_blake3_update(&ctx, info->pSpecializationInfo->pData,
info->pSpecializationInfo->dataSize);
}
uint32_t req_subgroup_size = get_required_subgroup_size(info);
_mesa_blake3_update(&ctx, &req_subgroup_size, sizeof(req_subgroup_size));
_mesa_blake3_final(&ctx, stage_blake3);
}
void
vk_pipeline_hash_shader_stage(VkPipelineCreateFlags2KHR pipeline_flags,
const VkPipelineShaderStageCreateInfo *info,
const struct vk_pipeline_robustness_state *rstate,
unsigned char *stage_sha1)
{
blake3_hash blake_hash;
vk_pipeline_hash_shader_stage_blake3(pipeline_flags, info, rstate, blake_hash);
_mesa_sha1_compute(blake_hash, sizeof(blake_hash), stage_sha1);
}
static VkPipelineRobustnessBufferBehaviorEXT
vk_device_default_robust_buffer_behavior(const struct vk_device *device)
{
if (device->enabled_features.robustBufferAccess2) {
return VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT;
} else if (device->enabled_features.robustBufferAccess) {
return VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
} else {
return VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
}
}
static VkPipelineRobustnessImageBehaviorEXT
vk_device_default_robust_image_behavior(const struct vk_device *device)
{
if (device->enabled_features.robustImageAccess2) {
return VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
} else if (device->enabled_features.robustImageAccess) {
return VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT;
} else {
return VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT;
}
}
void
vk_pipeline_robustness_state_fill(const struct vk_device *device,
struct vk_pipeline_robustness_state *rs,
const void *pipeline_pNext,
const void *shader_stage_pNext)
{
*rs = (struct vk_pipeline_robustness_state) {
.uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT,
.storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT,
.vertex_inputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT,
.images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DEVICE_DEFAULT_EXT,
.null_uniform_buffer_descriptor = device->enabled_features.nullDescriptor,
.null_storage_buffer_descriptor = device->enabled_features.nullDescriptor,
};
const VkPipelineRobustnessCreateInfoEXT *shader_info =
vk_find_struct_const(shader_stage_pNext,
PIPELINE_ROBUSTNESS_CREATE_INFO_EXT);
if (shader_info) {
rs->storage_buffers = shader_info->storageBuffers;
rs->uniform_buffers = shader_info->uniformBuffers;
rs->vertex_inputs = shader_info->vertexInputs;
rs->images = shader_info->images;
} else {
const VkPipelineRobustnessCreateInfoEXT *pipeline_info =
vk_find_struct_const(pipeline_pNext,
PIPELINE_ROBUSTNESS_CREATE_INFO_EXT);
if (pipeline_info) {
rs->storage_buffers = pipeline_info->storageBuffers;
rs->uniform_buffers = pipeline_info->uniformBuffers;
rs->vertex_inputs = pipeline_info->vertexInputs;
rs->images = pipeline_info->images;
}
}
if (rs->storage_buffers ==
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT)
rs->storage_buffers = vk_device_default_robust_buffer_behavior(device);
if (rs->uniform_buffers ==
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT)
rs->uniform_buffers = vk_device_default_robust_buffer_behavior(device);
if (rs->vertex_inputs ==
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT)
rs->vertex_inputs = vk_device_default_robust_buffer_behavior(device);
if (rs->images == VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DEVICE_DEFAULT_EXT)
rs->images = vk_device_default_robust_image_behavior(device);
}
static void
vk_pipeline_init(struct vk_pipeline *pipeline,
const struct vk_pipeline_ops *ops,
VkPipelineBindPoint bind_point,
VkPipelineCreateFlags2KHR flags)
{
pipeline->ops = ops;
pipeline->bind_point = bind_point;
pipeline->flags = flags;
}
void *
vk_pipeline_zalloc(struct vk_device *device,
const struct vk_pipeline_ops *ops,
VkPipelineBindPoint bind_point,
VkPipelineCreateFlags2KHR flags,
const VkAllocationCallbacks *alloc,
size_t size)
{
struct vk_pipeline *pipeline =
vk_object_zalloc(device, alloc, size, VK_OBJECT_TYPE_PIPELINE);
if (pipeline == NULL)
return NULL;
vk_pipeline_init(pipeline, ops, bind_point, flags);
return pipeline;
}
void *vk_pipeline_multizalloc(struct vk_device *device,
struct vk_multialloc *ma,
const struct vk_pipeline_ops *ops,
VkPipelineBindPoint bind_point,
VkPipelineCreateFlags2KHR flags,
const VkAllocationCallbacks *alloc)
{
struct vk_pipeline *pipeline =
vk_object_multizalloc(device, ma, alloc, VK_OBJECT_TYPE_PIPELINE);
if (!pipeline)
return NULL;
vk_pipeline_init(pipeline, ops, bind_point, flags);
return pipeline;
}
void
vk_pipeline_free(struct vk_device *device,
const VkAllocationCallbacks *alloc,
struct vk_pipeline *pipeline)
{
vk_object_free(device, alloc, &pipeline->base);
}
VKAPI_ATTR void VKAPI_CALL
vk_common_DestroyPipeline(VkDevice _device,
VkPipeline _pipeline,
const VkAllocationCallbacks *pAllocator)
{
VK_FROM_HANDLE(vk_device, device, _device);
VK_FROM_HANDLE(vk_pipeline, pipeline, _pipeline);
if (pipeline == NULL)
return;
pipeline->ops->destroy(device, pipeline, pAllocator);
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_GetPipelineExecutablePropertiesKHR(
VkDevice _device,
const VkPipelineInfoKHR *pPipelineInfo,
uint32_t *pExecutableCount,
VkPipelineExecutablePropertiesKHR *pProperties)
{
VK_FROM_HANDLE(vk_device, device, _device);
VK_FROM_HANDLE(vk_pipeline, pipeline, pPipelineInfo->pipeline);
return pipeline->ops->get_executable_properties(device, pipeline,
pExecutableCount,
pProperties);
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_GetPipelineExecutableStatisticsKHR(
VkDevice _device,
const VkPipelineExecutableInfoKHR *pExecutableInfo,
uint32_t *pStatisticCount,
VkPipelineExecutableStatisticKHR *pStatistics)
{
VK_FROM_HANDLE(vk_device, device, _device);
VK_FROM_HANDLE(vk_pipeline, pipeline, pExecutableInfo->pipeline);
return pipeline->ops->get_executable_statistics(device, pipeline,
pExecutableInfo->executableIndex,
pStatisticCount, pStatistics);
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_GetPipelineExecutableInternalRepresentationsKHR(
VkDevice _device,
const VkPipelineExecutableInfoKHR *pExecutableInfo,
uint32_t *pInternalRepresentationCount,
VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
{
VK_FROM_HANDLE(vk_device, device, _device);
VK_FROM_HANDLE(vk_pipeline, pipeline, pExecutableInfo->pipeline);
return pipeline->ops->get_internal_representations(device, pipeline,
pExecutableInfo->executableIndex,
pInternalRepresentationCount,
pInternalRepresentations);
}
VKAPI_ATTR void VKAPI_CALL
vk_common_CmdBindPipeline(VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipeline _pipeline)
{
VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_pipeline, pipeline, _pipeline);
assert(pipeline->bind_point == pipelineBindPoint);
pipeline->ops->cmd_bind(cmd_buffer, pipeline);
}
static const struct vk_pipeline_cache_object_ops pipeline_shader_cache_ops;
static struct vk_shader *
vk_shader_from_cache_obj(struct vk_pipeline_cache_object *object)
{
assert(object->ops == &pipeline_shader_cache_ops);
return container_of(object, struct vk_shader, pipeline.cache_obj);
}
static bool
vk_pipeline_shader_serialize(struct vk_pipeline_cache_object *object,
struct blob *blob)
{
struct vk_shader *shader = vk_shader_from_cache_obj(object);
struct vk_device *device = shader->base.device;
return shader->ops->serialize(device, shader, blob);
}
static void
vk_shader_init_cache_obj(struct vk_device *device, struct vk_shader *shader,
const void *key_data, size_t key_size)
{
assert(key_size == sizeof(shader->pipeline.cache_key));
memcpy(&shader->pipeline.cache_key, key_data,
sizeof(shader->pipeline.cache_key));
vk_pipeline_cache_object_init(device, &shader->pipeline.cache_obj,
&pipeline_shader_cache_ops,
&shader->pipeline.cache_key,
sizeof(shader->pipeline.cache_key));
}
static struct vk_pipeline_cache_object *
vk_pipeline_shader_deserialize(struct vk_device *device,
const void *key_data, size_t key_size,
struct blob_reader *blob)
{
const struct vk_device_shader_ops *ops = device->shader_ops;
/* TODO: Do we really want to always use the latest version? */
const uint32_t version = device->physical->properties.shaderBinaryVersion;
struct vk_shader *shader;
VkResult result = ops->deserialize(device, blob, version,
&device->alloc, &shader);
if (result != VK_SUCCESS) {
assert(result == VK_ERROR_OUT_OF_HOST_MEMORY);
return NULL;
}
vk_shader_init_cache_obj(device, shader, key_data, key_size);
return &shader->pipeline.cache_obj;
}
static struct vk_pipeline_cache_object *
vk_pipeline_shader_deserialize_cb(struct vk_pipeline_cache *cache,
const void *key_data, size_t key_size,
struct blob_reader *blob)
{
return vk_pipeline_shader_deserialize(cache->base.device,
key_data, key_size, blob);
}
static void
vk_pipeline_shader_destroy(struct vk_device *device,
struct vk_pipeline_cache_object *object)
{
struct vk_shader *shader = vk_shader_from_cache_obj(object);
assert(shader->base.device == device);
vk_shader_destroy(device, shader, &device->alloc);
}
static const struct vk_pipeline_cache_object_ops pipeline_shader_cache_ops = {
.serialize = vk_pipeline_shader_serialize,
.deserialize = vk_pipeline_shader_deserialize_cb,
.destroy = vk_pipeline_shader_destroy,
};
static struct vk_shader *
vk_shader_ref(struct vk_shader *shader)
{
vk_pipeline_cache_object_ref(&shader->pipeline.cache_obj);
return shader;
}
static void
vk_shader_unref(struct vk_device *device, struct vk_shader *shader)
{
vk_pipeline_cache_object_unref(device, &shader->pipeline.cache_obj);
}
PRAGMA_DIAGNOSTIC_PUSH
PRAGMA_DIAGNOSTIC_ERROR(-Wpadded)
struct vk_pipeline_tess_info {
unsigned tcs_vertices_out : 8;
unsigned primitive_mode : 2; /* tess_primitive_mode */
unsigned spacing : 2; /* gl_tess_spacing */
unsigned ccw : 1;
unsigned point_mode : 1;
unsigned _pad : 18;
};
PRAGMA_DIAGNOSTIC_POP
static_assert(sizeof(struct vk_pipeline_tess_info) == 4,
"This struct has no holes");
static void
vk_pipeline_gather_nir_tess_info(const nir_shader *nir,
struct vk_pipeline_tess_info *info)
{
info->tcs_vertices_out = nir->info.tess.tcs_vertices_out;
info->primitive_mode = nir->info.tess._primitive_mode;
info->spacing = nir->info.tess.spacing;
info->ccw = nir->info.tess.ccw;
info->point_mode = nir->info.tess.point_mode;
}
static void
vk_pipeline_replace_nir_tess_info(nir_shader *nir,
const struct vk_pipeline_tess_info *info)
{
nir->info.tess.tcs_vertices_out = info->tcs_vertices_out;
nir->info.tess._primitive_mode = info->primitive_mode;
nir->info.tess.spacing = info->spacing;
nir->info.tess.ccw = info->ccw;
nir->info.tess.point_mode = info->point_mode;
}
static void
vk_pipeline_tess_info_merge(struct vk_pipeline_tess_info *dst,
const struct vk_pipeline_tess_info *src)
{
/* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
*
* "PointMode. Controls generation of points rather than triangles
* or lines. This functionality defaults to disabled, and is
* enabled if either shader stage includes the execution mode.
*
* and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
* PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
* and OutputVertices, it says:
*
* "One mode must be set in at least one of the tessellation
* shader stages."
*
* So, the fields can be set in either the TCS or TES, but they must
* agree if set in both.
*/
assert(dst->tcs_vertices_out == 0 ||
src->tcs_vertices_out == 0 ||
dst->tcs_vertices_out == src->tcs_vertices_out);
dst->tcs_vertices_out |= src->tcs_vertices_out;
static_assert(TESS_SPACING_UNSPECIFIED == 0, "");
assert(dst->spacing == TESS_SPACING_UNSPECIFIED ||
src->spacing == TESS_SPACING_UNSPECIFIED ||
dst->spacing == src->spacing);
dst->spacing |= src->spacing;
static_assert(TESS_PRIMITIVE_UNSPECIFIED == 0, "");
assert(dst->primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
src->primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
dst->primitive_mode == src->primitive_mode);
dst->primitive_mode |= src->primitive_mode;
dst->ccw |= src->ccw;
dst->point_mode |= src->point_mode;
}
struct vk_pipeline_precomp_shader {
struct vk_pipeline_cache_object cache_obj;
/* Key for this cache_obj in the pipeline cache.
*/
blake3_hash cache_key;
mesa_shader_stage stage;
struct vk_pipeline_robustness_state rs;
/* Tessellation info if the shader is a tessellation shader */
struct vk_pipeline_tess_info tess;
struct blob nir_blob;
};
static struct vk_pipeline_precomp_shader *
vk_pipeline_precomp_shader_ref(struct vk_pipeline_precomp_shader *shader)
{
vk_pipeline_cache_object_ref(&shader->cache_obj);
return shader;
}
static void
vk_pipeline_precomp_shader_unref(struct vk_device *device,
struct vk_pipeline_precomp_shader *shader)
{
vk_pipeline_cache_object_unref(device, &shader->cache_obj);
}
static const struct vk_pipeline_cache_object_ops pipeline_precomp_shader_cache_ops;
static struct vk_pipeline_precomp_shader *
vk_pipeline_precomp_shader_from_cache_obj(struct vk_pipeline_cache_object *obj)
{
assert(obj->ops == & pipeline_precomp_shader_cache_ops);
return container_of(obj, struct vk_pipeline_precomp_shader, cache_obj);
}
static struct vk_pipeline_precomp_shader *
vk_pipeline_precomp_shader_create(struct vk_device *device,
const void *key_data, size_t key_size,
const struct vk_pipeline_robustness_state *rs,
nir_shader *nir)
{
struct blob blob;
blob_init(&blob);
nir_serialize(&blob, nir, false);
if (blob.out_of_memory)
goto fail_blob;
struct vk_pipeline_precomp_shader *shader =
vk_zalloc(&device->alloc, sizeof(*shader), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (shader == NULL)
goto fail_blob;
assert(sizeof(shader->cache_key) == key_size);
memcpy(shader->cache_key, key_data, sizeof(shader->cache_key));
vk_pipeline_cache_object_init(device, &shader->cache_obj,
&pipeline_precomp_shader_cache_ops,
shader->cache_key,
sizeof(shader->cache_key));
shader->stage = nir->info.stage;
shader->rs = *rs;
vk_pipeline_gather_nir_tess_info(nir, &shader->tess);
shader->nir_blob = blob;
return shader;
fail_blob:
blob_finish(&blob);
return NULL;
}
static bool
vk_pipeline_precomp_shader_serialize(struct vk_pipeline_cache_object *obj,
struct blob *blob)
{
struct vk_pipeline_precomp_shader *shader =
vk_pipeline_precomp_shader_from_cache_obj(obj);
blob_write_uint32(blob, shader->stage);
blob_write_bytes(blob, &shader->rs, sizeof(shader->rs));
blob_write_bytes(blob, &shader->tess, sizeof(shader->tess));
blob_write_uint64(blob, shader->nir_blob.size);
blob_write_bytes(blob, shader->nir_blob.data, shader->nir_blob.size);
return !blob->out_of_memory;
}
static struct vk_pipeline_cache_object *
vk_pipeline_precomp_shader_deserialize(struct vk_device *device,
const void *key_data, size_t key_size,
struct blob_reader *blob)
{
struct vk_pipeline_precomp_shader *shader =
vk_zalloc(&device->alloc, sizeof(*shader), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (shader == NULL)
return NULL;
assert(sizeof(shader->cache_key) == key_size);
memcpy(shader->cache_key, key_data, sizeof(shader->cache_key));
vk_pipeline_cache_object_init(device, &shader->cache_obj,
&pipeline_precomp_shader_cache_ops,
shader->cache_key,
sizeof(shader->cache_key));
shader->stage = blob_read_uint32(blob);
blob_copy_bytes(blob, &shader->rs, sizeof(shader->rs));
blob_copy_bytes(blob, &shader->tess, sizeof(shader->tess));
uint64_t nir_size = blob_read_uint64(blob);
if (blob->overrun || nir_size > SIZE_MAX)
goto fail_shader;
const void *nir_data = blob_read_bytes(blob, nir_size);
if (blob->overrun)
goto fail_shader;
blob_init(&shader->nir_blob);
blob_write_bytes(&shader->nir_blob, nir_data, nir_size);
if (shader->nir_blob.out_of_memory)
goto fail_nir_blob;
return &shader->cache_obj;
fail_nir_blob:
blob_finish(&shader->nir_blob);
fail_shader:
vk_pipeline_cache_object_finish(&shader->cache_obj);
vk_free(&device->alloc, shader);
return NULL;
}
static struct vk_pipeline_cache_object *
vk_pipeline_precomp_shader_deserialize_cb(struct vk_pipeline_cache *cache,
const void *key_data, size_t key_size,
struct blob_reader *blob)
{
return vk_pipeline_precomp_shader_deserialize(cache->base.device,
key_data, key_size, blob);
}
static void
vk_pipeline_precomp_shader_destroy(struct vk_device *device,
struct vk_pipeline_cache_object *obj)
{
struct vk_pipeline_precomp_shader *shader =
vk_pipeline_precomp_shader_from_cache_obj(obj);
blob_finish(&shader->nir_blob);
vk_pipeline_cache_object_finish(&shader->cache_obj);
vk_free(&device->alloc, shader);
}
static nir_shader *
vk_pipeline_precomp_shader_get_nir(const struct vk_pipeline_precomp_shader *shader,
const struct nir_shader_compiler_options *nir_options)
{
struct blob_reader blob;
blob_reader_init(&blob, shader->nir_blob.data, shader->nir_blob.size);
nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
if (blob.overrun) {
ralloc_free(nir);
return NULL;
}
return nir;
}
static const struct vk_pipeline_cache_object_ops pipeline_precomp_shader_cache_ops = {
.serialize = vk_pipeline_precomp_shader_serialize,
.deserialize = vk_pipeline_precomp_shader_deserialize_cb,
.destroy = vk_pipeline_precomp_shader_destroy,
};
struct vk_pipeline_stage {
mesa_shader_stage stage;
/* Whether the shader was linked with others (RT pipelines only) */
bool linked:1;
/* Whether the shader was imported from a library (Gfx pipelines only) */
bool imported:1;
/* Hash used to lookup the precomp */
blake3_hash precomp_key;
struct vk_pipeline_precomp_shader *precomp;
/* Hash used to lookup the shader */
blake3_hash shader_key;
struct vk_shader *shader;
};
static void
vk_pipeline_hash_precomp_shader_stage(struct vk_device *device,
VkPipelineCreateFlags2KHR pipeline_flags,
const void *pipeline_info_pNext,
const VkPipelineShaderStageCreateInfo *info,
struct vk_pipeline_stage *stage)
{
struct vk_pipeline_robustness_state rs;
vk_pipeline_robustness_state_fill(device, &rs, pipeline_info_pNext,
info->pNext);
vk_pipeline_hash_shader_stage_blake3(pipeline_flags, info,
&rs, stage->precomp_key);
}
static VkResult
vk_pipeline_precompile_shader(struct vk_device *device,
struct vk_pipeline_cache *cache,
VkPipelineCreateFlags2KHR pipeline_flags,
const void *pipeline_info_pNext,
const VkPipelineShaderStageCreateInfo *info,
struct vk_pipeline_stage *stage)
{
const struct vk_device_shader_ops *ops = device->shader_ops;
VkResult result;
if (cache != NULL) {
struct vk_pipeline_cache_object *cache_obj =
vk_pipeline_cache_lookup_object(cache,
stage->precomp_key,
sizeof(stage->precomp_key),
&pipeline_precomp_shader_cache_ops,
NULL /* cache_hit */);
if (cache_obj != NULL) {
stage->precomp = vk_pipeline_precomp_shader_from_cache_obj(cache_obj);
return VK_SUCCESS;
}
}
if (pipeline_flags &
VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
return VK_PIPELINE_COMPILE_REQUIRED;
struct vk_pipeline_robustness_state rs;
vk_pipeline_robustness_state_fill(device, &rs,
pipeline_info_pNext,
info->pNext);
const struct nir_shader_compiler_options *nir_options =
ops->get_nir_options(device->physical, stage->stage, &rs);
const struct spirv_to_nir_options spirv_options =
ops->get_spirv_options(device->physical, stage->stage, &rs);
nir_shader *nir;
result = vk_pipeline_shader_stage_to_nir(device, pipeline_flags, info,
&spirv_options, nir_options,
NULL, &nir);
if (result != VK_SUCCESS)
return result;
if (ops->preprocess_nir != NULL)
ops->preprocess_nir(device->physical, nir, &rs);
stage->precomp =
vk_pipeline_precomp_shader_create(device, stage->precomp_key,
sizeof(stage->precomp_key),
&rs, nir);
ralloc_free(nir);
if (stage->precomp == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (cache != NULL) {
struct vk_pipeline_cache_object *cache_obj = &stage->precomp->cache_obj;
cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
stage->precomp = vk_pipeline_precomp_shader_from_cache_obj(cache_obj);
}
return VK_SUCCESS;
}
static VkResult
vk_pipeline_load_precomp_from_binary(struct vk_device *device,
struct vk_pipeline_stage *stage,
struct vk_pipeline_binary *binary)
{
struct vk_pipeline_cache_object *cache_obj;
if (device->mem_cache) {
cache_obj = vk_pipeline_cache_create_and_insert_object(
device->mem_cache,
binary->key, sizeof(binary->key),
binary->data, binary->size,
&pipeline_precomp_shader_cache_ops);
} else {
struct blob_reader reader;
blob_reader_init(&reader, binary->data, binary->size);
cache_obj = vk_pipeline_precomp_shader_deserialize(
device, binary->key, sizeof(binary->key), &reader);
}
if (cache_obj == NULL)
return vk_error(device, VK_ERROR_UNKNOWN);
stage->precomp = vk_pipeline_precomp_shader_from_cache_obj(cache_obj);
memcpy(stage->precomp_key, stage->precomp->cache_key,
sizeof(stage->precomp_key));
return VK_SUCCESS;
}
static VkResult
vk_pipeline_load_shader_from_binary(struct vk_device *device,
struct vk_pipeline_stage *stage,
struct vk_pipeline_binary *binary)
{
struct vk_pipeline_cache_object *cache_obj;
if (device->mem_cache) {
cache_obj = vk_pipeline_cache_create_and_insert_object(
device->mem_cache,
binary->key, sizeof(binary->key),
binary->data, binary->size,
&pipeline_shader_cache_ops);
} else {
struct blob_reader reader;
blob_reader_init(&reader, binary->data, binary->size);
cache_obj = vk_pipeline_shader_deserialize(
device, binary->key, sizeof(binary->key), &reader);
}
if (cache_obj == NULL)
return vk_error(device, VK_ERROR_UNKNOWN);
stage->shader = vk_shader_from_cache_obj(cache_obj);
memcpy(stage->shader_key, stage->shader->pipeline.cache_key,
sizeof(stage->shader_key));
return VK_SUCCESS;
}
static int
cmp_vk_pipeline_stages(const void *_a, const void *_b)
{
const struct vk_pipeline_stage *a = _a, *b = _b;
return vk_shader_cmp_graphics_stages(a->stage, b->stage);
}
static bool
vk_pipeline_stage_is_null(const struct vk_pipeline_stage *stage)
{
return stage->precomp == NULL && stage->shader == NULL;
}
static void
vk_pipeline_stage_finish(struct vk_device *device,
struct vk_pipeline_stage *stage)
{
if (stage->precomp != NULL)
vk_pipeline_precomp_shader_unref(device, stage->precomp);
if (stage->shader)
vk_shader_unref(device, stage->shader);
}
static struct vk_pipeline_stage
vk_pipeline_stage_clone(const struct vk_pipeline_stage *in)
{
struct vk_pipeline_stage out = *in;
if (in->precomp)
out.precomp = vk_pipeline_precomp_shader_ref(in->precomp);
if (in->shader)
out.shader = vk_shader_ref(in->shader);
return out;
}
static const VkPushConstantRange *
get_push_range_for_stage(struct vk_pipeline_layout *pipeline_layout,
mesa_shader_stage stage)
{
const VkShaderStageFlags vk_stage = mesa_to_vk_shader_stage(stage);
const VkPushConstantRange *push_range = NULL;
if (pipeline_layout != NULL) {
for (uint32_t r = 0; r < pipeline_layout->push_range_count; r++) {
if (pipeline_layout->push_ranges[r].stageFlags & vk_stage) {
assert(push_range == NULL);
push_range = &pipeline_layout->push_ranges[r];
}
}
}
return push_range;
}
struct vk_graphics_pipeline {
struct vk_pipeline base;
union {
struct {
struct vk_graphics_pipeline_all_state all_state;
struct vk_graphics_pipeline_state state;
} lib;
struct {
struct vk_vertex_input_state _dynamic_vi;
struct vk_sample_locations_state _dynamic_sl;
struct vk_dynamic_graphics_state dynamic;
} linked;
};
uint32_t set_layout_count;
struct vk_descriptor_set_layout *set_layouts[MESA_VK_MAX_DESCRIPTOR_SETS];
uint32_t stage_count;
struct vk_pipeline_stage stages[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES];
};
static void
vk_graphics_pipeline_destroy(struct vk_device *device,
struct vk_pipeline *pipeline,
const VkAllocationCallbacks *pAllocator)
{
struct vk_graphics_pipeline *gfx_pipeline =
container_of(pipeline, struct vk_graphics_pipeline, base);
for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++)
vk_pipeline_stage_finish(device, &gfx_pipeline->stages[i]);
for (uint32_t i = 0; i < gfx_pipeline->set_layout_count; i++) {
if (gfx_pipeline->set_layouts[i] != NULL)
vk_descriptor_set_layout_unref(device, gfx_pipeline->set_layouts[i]);
}
vk_pipeline_free(device, pAllocator, pipeline);
}
static bool
vk_device_supports_stage(struct vk_device *device,
mesa_shader_stage stage)
{
const struct vk_features *features = &device->physical->supported_features;
switch (stage) {
case MESA_SHADER_VERTEX:
case MESA_SHADER_FRAGMENT:
case MESA_SHADER_COMPUTE:
return true;
case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_TESS_EVAL:
return features->tessellationShader;
case MESA_SHADER_GEOMETRY:
return features->geometryShader;
case MESA_SHADER_TASK:
return features->taskShader;
case MESA_SHADER_MESH:
return features->meshShader;
default:
return false;
}
}
static const mesa_shader_stage all_gfx_stages[] = {
MESA_SHADER_VERTEX,
MESA_SHADER_TESS_CTRL,
MESA_SHADER_TESS_EVAL,
MESA_SHADER_GEOMETRY,
MESA_SHADER_TASK,
MESA_SHADER_MESH,
MESA_SHADER_FRAGMENT,
};
static void
vk_graphics_pipeline_cmd_bind(struct vk_command_buffer *cmd_buffer,
struct vk_pipeline *pipeline)
{
struct vk_device *device = cmd_buffer->base.device;
const struct vk_device_shader_ops *ops = device->shader_ops;
struct vk_graphics_pipeline *gfx_pipeline = NULL;
struct vk_shader *stage_shader[MESA_SHADER_MESH_STAGES] = { NULL, };
if (pipeline != NULL) {
assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
assert(!(pipeline->flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR));
gfx_pipeline = container_of(pipeline, struct vk_graphics_pipeline, base);
for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) {
struct vk_shader *shader = gfx_pipeline->stages[i].shader;
stage_shader[shader->stage] = shader;
}
}
uint32_t stage_count = 0;
mesa_shader_stage stages[ARRAY_SIZE(all_gfx_stages)];
struct vk_shader *shaders[ARRAY_SIZE(all_gfx_stages)];
VkShaderStageFlags vk_stages = 0;
for (uint32_t i = 0; i < ARRAY_SIZE(all_gfx_stages); i++) {
mesa_shader_stage stage = all_gfx_stages[i];
if (!vk_device_supports_stage(device, stage)) {
assert(stage_shader[stage] == NULL);
continue;
}
vk_stages |= mesa_to_vk_shader_stage(stage);
stages[stage_count] = stage;
shaders[stage_count] = stage_shader[stage];
stage_count++;
}
ops->cmd_bind_shaders(cmd_buffer, stage_count, stages, shaders);
if (gfx_pipeline != NULL) {
cmd_buffer->pipeline_shader_stages |= vk_stages;
ops->cmd_set_dynamic_graphics_state(cmd_buffer,
&gfx_pipeline->linked.dynamic);
} else {
cmd_buffer->pipeline_shader_stages &= ~vk_stages;
}
}
static VkShaderCreateFlagsEXT
vk_pipeline_to_shader_flags(VkPipelineCreateFlags2KHR pipeline_flags,
mesa_shader_stage stage)
{
VkShaderCreateFlagsEXT shader_flags = 0;
if (pipeline_flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)
shader_flags |= VK_SHADER_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_MESA;
if (pipeline_flags & VK_PIPELINE_CREATE_2_INDIRECT_BINDABLE_BIT_EXT)
shader_flags |= VK_SHADER_CREATE_INDIRECT_BINDABLE_BIT_EXT;
if (stage == MESA_SHADER_FRAGMENT) {
if (pipeline_flags & VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
shader_flags |= VK_SHADER_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_EXT;
if (pipeline_flags & VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT)
shader_flags |= VK_SHADER_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT;
}
if (stage == MESA_SHADER_COMPUTE) {
if (pipeline_flags & VK_PIPELINE_CREATE_2_DISPATCH_BASE_BIT_KHR)
shader_flags |= VK_SHADER_CREATE_DISPATCH_BASE_BIT_EXT;
if (pipeline_flags & VK_PIPELINE_CREATE_2_UNALIGNED_DISPATCH_BIT_MESA)
shader_flags |= VK_SHADER_CREATE_UNALIGNED_DISPATCH_BIT_MESA;
}
return shader_flags;
}
struct vk_graphics_pipeline_compile_info {
/* Compacted array of stages */
struct vk_pipeline_stage stages[MESA_SHADER_MESH_STAGES];
uint32_t stage_count;
/* Maps gl_shader_stage to the matching index in stages[] */
uint32_t stage_to_index[MESA_SHADER_MESH_STAGES];
uint32_t set_layout_count;
struct vk_descriptor_set_layout *set_layouts[MESA_VK_MAX_DESCRIPTOR_SETS];
struct vk_graphics_pipeline_state *state;
bool retain_precomp;
bool optimize;
uint32_t part_count;
uint32_t partition[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES + 1];
VkShaderStageFlags part_stages[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES];
};
/* Compute all the state necessary for compilation, this includes precomp
* shader hashes, final shader hashes and all the state necessary.
*/
static void
vk_get_graphics_pipeline_compile_info(struct vk_graphics_pipeline_compile_info *info,
struct vk_device *device,
struct vk_graphics_pipeline_state *state,
struct vk_graphics_pipeline_all_state *all_state,
const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
memset(info, 0, sizeof(*info));
info->state = state;
const VkPipelineCreateFlags2KHR pipeline_flags =
vk_graphics_pipeline_create_flags(pCreateInfo);
info->retain_precomp =
(pipeline_flags &
VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT) != 0;
const VkPipelineBinaryInfoKHR *bin_info =
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_BINARY_INFO_KHR);
const VkPipelineLibraryCreateInfoKHR *libs_info =
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_LIBRARY_CREATE_INFO_KHR);
VkShaderStageFlags all_stages = 0;
if (libs_info) {
for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
VK_FROM_HANDLE(vk_pipeline, lib_pipeline, libs_info->pLibraries[i]);
assert(lib_pipeline->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS);
assert(lib_pipeline->flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR);
struct vk_graphics_pipeline *lib_gfx_pipeline =
container_of(lib_pipeline, struct vk_graphics_pipeline, base);
vk_graphics_pipeline_state_merge(info->state, &lib_gfx_pipeline->lib.state);
info->set_layout_count = MAX2(info->set_layout_count,
lib_gfx_pipeline->set_layout_count);
for (uint32_t i = 0; i < lib_gfx_pipeline->set_layout_count; i++) {
if (lib_gfx_pipeline->set_layouts[i] == NULL)
continue;
if (info->set_layouts[i] == NULL)
info->set_layouts[i] = lib_gfx_pipeline->set_layouts[i];
}
for (uint32_t i = 0; i < lib_gfx_pipeline->stage_count; i++) {
const struct vk_pipeline_stage *lib_stage =
&lib_gfx_pipeline->stages[i];
/* We shouldn't have duplicated stages in the imported pipeline
* but it's cheap enough to protect against it so we may as well.
*/
assert(lib_stage->stage < ARRAY_SIZE(info->stages));
assert(vk_pipeline_stage_is_null(&info->stages[lib_stage->stage]));
if (!vk_pipeline_stage_is_null(&info->stages[lib_stage->stage]))
continue;
info->stages[lib_stage->stage] = vk_pipeline_stage_clone(lib_stage);
info->stages[lib_stage->stage].imported = true;
all_stages |= mesa_to_vk_shader_stage(lib_stage->stage);
}
}
}
if (pipeline_layout != NULL) {
info->set_layout_count = MAX2(info->set_layout_count,
pipeline_layout->set_count);
for (uint32_t i = 0; i < pipeline_layout->set_count; i++) {
if (pipeline_layout->set_layouts[i] == NULL)
continue;
if (info->set_layouts[i] == NULL)
info->set_layouts[i] = pipeline_layout->set_layouts[i];
}
}
VkResult result = vk_graphics_pipeline_state_fill(device, info->state,
pCreateInfo,
NULL /* driver_rp */,
0 /* driver_rp_flags */,
all_state,
NULL, 0, NULL);
/* We provide a all_state so there should not be any allocation, hence no failure.*/
assert(result == VK_SUCCESS);
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
const VkPipelineShaderStageCreateInfo *stage_info =
&pCreateInfo->pStages[i];
assert(util_bitcount(stage_info->stage) == 1);
if (!(info->state->shader_stages & stage_info->stage))
continue;
mesa_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
assert(vk_device_supports_stage(device, stage));
/* We don't need to load anything for imported stages, precomp should be
* included if
* VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT was
* provided and shader should obviously be there.
*/
if (info->stages[stage].imported)
continue;
info->stages[stage] = (struct vk_pipeline_stage) {
.stage = stage,
};
all_stages |= stage_info->stage;
/*
* "If a VkPipelineBinaryInfoKHR structure with a binaryCount greater
* than 0 is included in the pNext chain of any Vk*PipelineCreateInfo
* structure when creating a pipeline, implementations must use the
* data in pPipelineBinaries instead of recalculating it. Any shader
* module identifiers or shader modules declared in
* VkPipelineShaderStageCreateInfo instances are ignored."
*/
if (bin_info != NULL && bin_info->binaryCount > 0)
continue;
vk_pipeline_hash_precomp_shader_stage(device, pipeline_flags,
pCreateInfo->pNext,
stage_info, &info->stages[stage]);
}
/* Compact the array of stages */
info->stage_count = 0;
for (uint32_t s = 0; s < ARRAY_SIZE(info->stages); s++) {
assert(s >= info->stage_count);
if (all_stages & mesa_to_vk_shader_stage(s))
info->stages[info->stage_count++] = info->stages[s];
}
for (uint32_t s = info->stage_count; s < ARRAY_SIZE(info->stages); s++)
memset(&info->stages[s], 0, sizeof(info->stages[s]));
/* Sort so we always give the driver shaders in order.
*
* This makes everything easier for everyone. This also helps stabilize
* shader keys so that we get a cache hit even if the client gives us the
* stages in a different order.
*/
qsort(info->stages, info->stage_count,
sizeof(info->stages[0]), cmp_vk_pipeline_stages);
for (uint32_t s = 0; s < info->stage_count; s++)
info->stage_to_index[info->stages[s].stage] = s;
/* Decide whether we should apply link-time optimizations. The spec says:
*
* VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT specifies that
* pipeline libraries being linked into this library should have link time
* optimizations applied. If this bit is omitted, implementations should
* instead perform linking as rapidly as possible.
*
* ...
*
* Using VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT (or not) when
* linking pipeline libraries is intended as a performance tradeoff
* between host and device. If the bit is omitted, linking should be
* faster and produce a pipeline more rapidly, but performance of the
* pipeline on the target device may be reduced. If the bit is included,
* linking may be slower but should produce a pipeline with device
* performance comparable to a monolithically created pipeline.
*
* The key phrase here is "pipeline libraries". When we are linking pipeline
* libraries, we look at this bit to determine whether to apply link-time
* optimizations. When there are not pipeline libraries, however, we are
* compiling a monolithic pipeline, which the last sentence implies should
* always have link-time optimizations applied.
*
* Summarizing, we want to link-time optimize monolithic pipelines and
* non-monolithic pipelines with LINK_TIME_OPTIMIZATION_BIT.
*
* (Strictly speaking, there's a corner case here, where a pipeline without
* LINK_TIME_OPTIMIZATION_BIT links pipeline libraries for graphics state but
* supplies shaders directly outside of the pipeline library. This logic does
* not link those shaders, which is a conservative choice. GPL is a disaster
* of combinatoric complexity, and this simplified approach gets good
* performance for the cases that actually matter: monolithic, GPL fast link,
* GPL optimized link.)
*/
info->optimize =
libs_info == NULL ||
(pipeline_flags &
VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT);
/* Partition the shaders. Whenever pipelines are used,
* vertex/geometry/fragment stages are always specified together, so should
* always be linked. That doesn't break the fast link since the relevant
* link happens at pipeline library create time.
*
* We don't gate this behind an option since linking shaders is beneficial
* on all hardware, to clean up the I/O mess that applications regularly
* leave.
*/
if (info->stage_count == 0) {
info->part_count = 0;
} else if (info->optimize) {
info->partition[1] = info->stage_count;
info->part_count = 1;
} else if (info->stages[0].stage == MESA_SHADER_FRAGMENT) {
assert(info->stage_count == 1);
info->partition[1] = info->stage_count;
info->part_count = 1;
} else if (info->stages[info->stage_count - 1].stage == MESA_SHADER_FRAGMENT) {
/* In this case we have both geometry stages and fragment */
assert(info->stage_count > 1);
info->partition[1] = info->stage_count - 1;
info->partition[2] = info->stage_count;
info->part_count = 2;
} else {
/* In this case we only have geometry stages */
info->partition[1] = info->stage_count;
info->part_count = 1;
}
for (uint32_t i = 0; i < info->part_count; i++) {
for (uint32_t j = info->partition[i]; j < info->partition[i + 1]; j++) {
const struct vk_pipeline_stage *stage = &info->stages[j];
info->part_stages[i] |= mesa_to_vk_shader_stage(stage->stage);
}
}
struct mesa_blake3 blake3_ctx;
_mesa_blake3_init(&blake3_ctx);
for (uint32_t i = 0; i < info->set_layout_count; i++) {
if (info->set_layouts[i] != NULL) {
_mesa_blake3_update(&blake3_ctx, info->set_layouts[i]->blake3,
sizeof(info->set_layouts[i]->blake3));
}
}
if (pipeline_layout != NULL) {
_mesa_blake3_update(&blake3_ctx, &pipeline_layout->push_ranges,
sizeof(pipeline_layout->push_ranges[0]) *
pipeline_layout->push_range_count);
}
blake3_hash layout_blake3;
_mesa_blake3_final(&blake3_ctx, layout_blake3);
const struct vk_device_shader_ops *ops = device->shader_ops;
for (uint32_t p = 0; p < info->part_count; p++) {
/* Don't try to re-compile any fast-link shaders */
if (!info->optimize && info->stages[info->partition[p]].shader != NULL)
continue;
_mesa_blake3_init(&blake3_ctx);
for (uint32_t i = info->partition[p]; i < info->partition[p + 1]; i++) {
const struct vk_pipeline_stage *stage = &info->stages[i];
_mesa_blake3_update(&blake3_ctx, stage->precomp_key,
sizeof(stage->precomp_key));
VkShaderCreateFlagsEXT shader_flags =
vk_pipeline_to_shader_flags(pipeline_flags, stage->stage);
_mesa_blake3_update(&blake3_ctx, &shader_flags, sizeof(shader_flags));
}
blake3_hash state_blake3;
ops->hash_state(device->physical, info->state,
&device->enabled_features, info->part_stages[p],
state_blake3);
_mesa_blake3_update(&blake3_ctx, state_blake3, sizeof(state_blake3));
_mesa_blake3_update(&blake3_ctx, layout_blake3, sizeof(layout_blake3));
blake3_hash linked_blake3;
_mesa_blake3_final(&blake3_ctx, linked_blake3);
for (uint32_t i = info->partition[p]; i < info->partition[p + 1]; i++) {
struct vk_pipeline_stage *stage = &info->stages[i];
/* Make the per-stage key unique by hashing in the stage */
_mesa_blake3_init(&blake3_ctx);
_mesa_blake3_update(&blake3_ctx, &stage->stage, sizeof(stage->stage));
_mesa_blake3_update(&blake3_ctx, linked_blake3, sizeof(linked_blake3));
_mesa_blake3_final(&blake3_ctx, stage->shader_key);
}
}
}
static void
vk_release_graphics_pipeline_compile_info(struct vk_graphics_pipeline_compile_info *info,
struct vk_device *device,
const VkAllocationCallbacks *pAllocator)
{
for (uint32_t i = 0; i < ARRAY_SIZE(info->stages); i++)
vk_pipeline_stage_finish(device, &info->stages[i]);
}
static VkResult
vk_graphics_pipeline_compile_shaders(struct vk_device *device,
struct vk_pipeline_cache *cache,
VkPipelineCreateFlags2KHR pipeline_flags,
struct vk_pipeline_layout *pipeline_layout,
struct vk_graphics_pipeline_compile_info *compile_info,
VkPipelineCreationFeedback *stage_feedbacks)
{
const struct vk_device_shader_ops *ops = device->shader_ops;
VkResult result;
if (compile_info->stage_count == 0)
return VK_SUCCESS;
/* If we're linking, throw away any previously compiled shaders as they
* likely haven't been properly linked. We keep the precompiled shaders
* and we still look it up in the cache so it may still be fast.
*/
if (compile_info->optimize) {
for (uint32_t i = 0; i < compile_info->stage_count; i++) {
if (compile_info->stages[i].shader != NULL) {
vk_shader_unref(device, compile_info->stages[i].shader);
compile_info->stages[i].shader = NULL;
}
}
}
bool have_all_shaders = true;
VkShaderStageFlags all_stages = 0;
struct vk_pipeline_precomp_shader *tcs_precomp = NULL, *tes_precomp = NULL;
for (uint32_t i = 0; i < compile_info->stage_count; i++) {
all_stages |= mesa_to_vk_shader_stage(compile_info->stages[i].stage);
if (compile_info->stages[i].shader == NULL)
have_all_shaders = false;
if (compile_info->stages[i].stage == MESA_SHADER_TESS_CTRL)
tcs_precomp = compile_info->stages[i].precomp;
if (compile_info->stages[i].stage == MESA_SHADER_TESS_EVAL)
tes_precomp = compile_info->stages[i].precomp;
}
/* If we already have a shader for each stage, there's nothing to do. */
if (have_all_shaders)
return VK_SUCCESS;
struct vk_pipeline_tess_info tess_info = { ._pad = 0 };
if (tcs_precomp != NULL && tes_precomp != NULL) {
tess_info = tcs_precomp->tess;
vk_pipeline_tess_info_merge(&tess_info, &tes_precomp->tess);
}
for (uint32_t p = 0; p < compile_info->part_count; p++) {
const int64_t part_start = os_time_get_nano();
/* Don't try to re-compile any fast-link shaders */
if (!compile_info->optimize &&
compile_info->stages[compile_info->partition[p]].shader != NULL)
continue;
if (cache != NULL) {
/* From the Vulkan 1.3.278 spec:
*
* "VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT
* indicates that a readily usable pipeline or pipeline stage was
* found in the pipelineCache specified by the application in the
* pipeline creation command.
*
* [...]
*
* Note
*
* Implementations are encouraged to provide a meaningful signal
* to applications using this bit. The intention is to communicate
* to the application that the pipeline or pipeline stage was
* created “as fast as it gets” using the pipeline cache provided
* by the application. If an implementation uses an internal
* cache, it is discouraged from setting this bit as the feedback
* would be unactionable."
*
* The cache_hit value returned by vk_pipeline_cache_lookup_object()
* is only set to true when the shader is found in the provided
* pipeline cache. It is left false if we fail to find it in the
* memory cache but find it in the disk cache even though that's
* still a cache hit from the perspective of the compile pipeline.
*/
bool all_shaders_found = true;
bool all_cache_hits = true;
for (uint32_t i = compile_info->partition[p]; i < compile_info->partition[p + 1]; i++) {
struct vk_pipeline_stage *stage = &compile_info->stages[i];
if (stage->shader) {
/* If we have a shader from some library pipeline and the key
* matches, just use that.
*/
if (memcmp(&stage->shader->pipeline.cache_key,
&stage->shader_key, sizeof(stage->shader_key)) == 0)
continue;
/* Otherwise, throw it away */
vk_shader_unref(device, stage->shader);
stage->shader = NULL;
}
bool cache_hit = false;
struct vk_pipeline_cache_object *cache_obj =
vk_pipeline_cache_lookup_object(cache, &stage->shader_key,
sizeof(stage->shader_key),
&pipeline_shader_cache_ops,
&cache_hit);
if (cache_obj != NULL) {
assert(stage->shader == NULL);
stage->shader = vk_shader_from_cache_obj(cache_obj);
} else {
all_shaders_found = false;
}
if (cache_obj == NULL && !cache_hit)
all_cache_hits = false;
}
if (all_cache_hits && cache != device->mem_cache) {
/* The pipeline cache only really helps if we hit for everything
* in the partition. Otherwise, we have to go re-compile it all
* anyway.
*/
for (uint32_t i = compile_info->partition[p]; i < compile_info->partition[p + 1]; i++) {
struct vk_pipeline_stage *stage = &compile_info->stages[i];
stage_feedbacks[stage->stage].flags |=
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
}
}
if (all_shaders_found) {
/* Update duration to take cache lookups into account */
const int64_t part_end = os_time_get_nano();
for (uint32_t i = compile_info->partition[p]; i < compile_info->partition[p + 1]; i++) {
struct vk_pipeline_stage *stage = &compile_info->stages[i];
stage_feedbacks[stage->stage].duration += part_end - part_start;
}
continue;
}
}
if (pipeline_flags &
VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
return VK_PIPELINE_COMPILE_REQUIRED;
struct vk_shader_compile_info infos[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES];
for (uint32_t i = compile_info->partition[p]; i < compile_info->partition[p + 1]; i++) {
struct vk_pipeline_stage *stage = &compile_info->stages[i];
VkShaderCreateFlagsEXT shader_flags =
vk_pipeline_to_shader_flags(pipeline_flags, stage->stage);
if (compile_info->partition[p + 1] - compile_info->partition[p] > 1)
shader_flags |= VK_SHADER_CREATE_LINK_STAGE_BIT_EXT;
if ((compile_info->part_stages[p] & VK_SHADER_STAGE_MESH_BIT_EXT) &&
!(all_stages & VK_SHADER_STAGE_TASK_BIT_EXT))
shader_flags = VK_SHADER_CREATE_NO_TASK_SHADER_BIT_EXT;
VkShaderStageFlags next_stage;
if (stage->stage == MESA_SHADER_FRAGMENT) {
next_stage = 0;
} else if (i + 1 < compile_info->stage_count) {
/* We're always linking all the geometry shaders and hashing their
* hashes together, so this is safe.
*/
next_stage = mesa_to_vk_shader_stage(compile_info->stages[i + 1].stage);
} else {
/* We're the last geometry stage */
next_stage = VK_SHADER_STAGE_FRAGMENT_BIT;
}
const struct nir_shader_compiler_options *nir_options =
ops->get_nir_options(device->physical, stage->stage,
&stage->precomp->rs);
nir_shader *nir =
vk_pipeline_precomp_shader_get_nir(stage->precomp, nir_options);
if (nir == NULL) {
for (uint32_t j = compile_info->partition[p]; j < i; j++)
ralloc_free(infos[i].nir);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
if (stage->stage == MESA_SHADER_TESS_CTRL ||
stage->stage == MESA_SHADER_TESS_EVAL)
vk_pipeline_replace_nir_tess_info(nir, &tess_info);
const VkPushConstantRange *push_range =
get_push_range_for_stage(pipeline_layout, stage->stage);
infos[i] = (struct vk_shader_compile_info) {
.stage = stage->stage,
.flags = shader_flags,
.next_stage_mask = next_stage,
.nir = nir,
.robustness = &stage->precomp->rs,
.set_layout_count = compile_info->set_layout_count,
.set_layouts = compile_info->set_layouts,
.push_constant_range_count = push_range != NULL,
.push_constant_ranges = push_range != NULL ? push_range : NULL,
};
}
/* vk_shader_ops::compile() consumes the NIR regardless of whether or
* not it succeeds and only generates shaders on success. Once this
* returns, we own the shaders but not the NIR in infos.
*/
struct vk_shader *shaders[MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES];
result = vk_compile_shaders(device,
compile_info->partition[p + 1] - compile_info->partition[p],
&infos[compile_info->partition[p]],
compile_info->state, &device->enabled_features,
&device->alloc,
&shaders[compile_info->partition[p]]);
if (result != VK_SUCCESS)
return result;
const int64_t part_end = os_time_get_nano();
for (uint32_t i = compile_info->partition[p]; i < compile_info->partition[p + 1]; i++) {
struct vk_pipeline_stage *stage = &compile_info->stages[i];
vk_shader_init_cache_obj(device, shaders[i], &stage->shader_key,
sizeof(stage->shader_key));
if (stage->shader == NULL) {
struct vk_pipeline_cache_object *cache_obj =
&shaders[i]->pipeline.cache_obj;
if (cache != NULL)
cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
stage->shader = vk_shader_from_cache_obj(cache_obj);
} else {
/* This can fail to happen if only some of the shaders were found
* in the pipeline cache. In this case, we just throw away the
* shader as vk_pipeline_cache_add_object() would throw it away
* for us anyway.
*/
assert(memcmp(&stage->shader->pipeline.cache_key,
&shaders[i]->pipeline.cache_key,
sizeof(shaders[i]->pipeline.cache_key)) == 0);
vk_shader_unref(device, shaders[i]);
}
stage_feedbacks[stage->stage].duration += part_end - part_start;
}
}
return VK_SUCCESS;
}
static VkResult
vk_graphics_pipeline_get_executable_properties(
struct vk_device *device,
struct vk_pipeline *pipeline,
uint32_t *executable_count,
VkPipelineExecutablePropertiesKHR *properties)
{
struct vk_graphics_pipeline *gfx_pipeline =
container_of(pipeline, struct vk_graphics_pipeline, base);
VkResult result;
if (properties == NULL) {
*executable_count = 0;
for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) {
struct vk_shader *shader = gfx_pipeline->stages[i].shader;
uint32_t shader_exec_count = 0;
result = shader->ops->get_executable_properties(device, shader,
&shader_exec_count,
NULL);
assert(result == VK_SUCCESS);
*executable_count += shader_exec_count;
}
} else {
uint32_t arr_len = *executable_count;
*executable_count = 0;
for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) {
struct vk_shader *shader = gfx_pipeline->stages[i].shader;
uint32_t shader_exec_count = arr_len - *executable_count;
result = shader->ops->get_executable_properties(device, shader,
&shader_exec_count,
&properties[*executable_count]);
if (result != VK_SUCCESS)
return result;
*executable_count += shader_exec_count;
}
}
return VK_SUCCESS;
}
static inline struct vk_shader *
vk_graphics_pipeline_executable_shader(struct vk_device *device,
struct vk_graphics_pipeline *gfx_pipeline,
uint32_t *executable_index)
{
for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) {
struct vk_shader *shader = gfx_pipeline->stages[i].shader;
uint32_t shader_exec_count = 0;
shader->ops->get_executable_properties(device, shader,
&shader_exec_count, NULL);
if (*executable_index < shader_exec_count)
return shader;
else
*executable_index -= shader_exec_count;
}
return NULL;
}
static VkResult
vk_graphics_pipeline_get_executable_statistics(
struct vk_device *device,
struct vk_pipeline *pipeline,
uint32_t executable_index,
uint32_t *statistic_count,
VkPipelineExecutableStatisticKHR *statistics)
{
struct vk_graphics_pipeline *gfx_pipeline =
container_of(pipeline, struct vk_graphics_pipeline, base);
struct vk_shader *shader =
vk_graphics_pipeline_executable_shader(device, gfx_pipeline,
&executable_index);
if (shader == NULL) {
*statistic_count = 0;
return VK_SUCCESS;
}
return shader->ops->get_executable_statistics(device, shader,
executable_index,
statistic_count,
statistics);
}
static VkResult
vk_graphics_pipeline_get_internal_representations(
struct vk_device *device,
struct vk_pipeline *pipeline,
uint32_t executable_index,
uint32_t *internal_representation_count,
VkPipelineExecutableInternalRepresentationKHR* internal_representations)
{
struct vk_graphics_pipeline *gfx_pipeline =
container_of(pipeline, struct vk_graphics_pipeline, base);
struct vk_shader *shader =
vk_graphics_pipeline_executable_shader(device, gfx_pipeline,
&executable_index);
if (shader == NULL) {
*internal_representation_count = 0;
return VK_SUCCESS;
}
return shader->ops->get_executable_internal_representations(
device, shader, executable_index,
internal_representation_count, internal_representations);
}
static struct vk_shader *
vk_graphics_pipeline_get_shader(struct vk_pipeline *pipeline,
mesa_shader_stage stage)
{
struct vk_graphics_pipeline *gfx_pipeline =
container_of(pipeline, struct vk_graphics_pipeline, base);
for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) {
if (gfx_pipeline->stages[i].stage == stage)
return gfx_pipeline->stages[i].shader;
}
return NULL;
}
static const struct vk_pipeline_ops vk_graphics_pipeline_ops = {
.destroy = vk_graphics_pipeline_destroy,
.get_executable_statistics = vk_graphics_pipeline_get_executable_statistics,
.get_executable_properties = vk_graphics_pipeline_get_executable_properties,
.get_internal_representations = vk_graphics_pipeline_get_internal_representations,
.cmd_bind = vk_graphics_pipeline_cmd_bind,
.get_shader = vk_graphics_pipeline_get_shader,
};
static VkResult
vk_create_graphics_pipeline(struct vk_device *device,
struct vk_pipeline_cache *cache,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkPipeline *pPipeline)
{
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
const int64_t pipeline_start = os_time_get_nano();
VkResult result;
const VkPipelineCreateFlags2KHR pipeline_flags =
vk_graphics_pipeline_create_flags(pCreateInfo);
const VkPipelineBinaryInfoKHR *bin_info =
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_BINARY_INFO_KHR);
const VkPipelineCreationFeedbackCreateInfo *feedback_info =
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
struct vk_graphics_pipeline *pipeline =
vk_pipeline_zalloc(device, &vk_graphics_pipeline_ops,
VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline_flags, pAllocator, sizeof(*pipeline));
if (pipeline == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
VkPipelineCreationFeedback stage_feedbacks[MESA_SHADER_MESH_STAGES];
memset(stage_feedbacks, 0, sizeof(stage_feedbacks));
const bool is_library = pipeline_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR;
struct vk_graphics_pipeline_state state_tmp;
struct vk_graphics_pipeline_all_state all_state_tmp;
if (!is_library)
memset(&state_tmp, 0, sizeof(state_tmp));
struct vk_graphics_pipeline_compile_info compile_info;
vk_get_graphics_pipeline_compile_info(
&compile_info, device,
is_library ? &pipeline->lib.state : &state_tmp,
is_library ? &pipeline->lib.all_state : &all_state_tmp,
pCreateInfo);
/* For pipeline libraries, the state is stored in the pipeline */
if (!(pipeline->base.flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) {
pipeline->linked.dynamic.vi = &pipeline->linked._dynamic_vi;
pipeline->linked.dynamic.ms.sample_locations =
&pipeline->linked._dynamic_sl;
vk_dynamic_graphics_state_fill(&pipeline->linked.dynamic, &state_tmp);
}
if (bin_info != NULL && bin_info->binaryCount > 0) {
const uint32_t expected_binary_count = compile_info.retain_precomp ?
(2 * pCreateInfo->stageCount) : pCreateInfo->stageCount;
if (bin_info->binaryCount < expected_binary_count) {
result = vk_error(device, VK_ERROR_UNKNOWN);
} else {
uint32_t binary_index = 0;
for (uint32_t i = 0; i < compile_info.stage_count; i++) {
if (compile_info.stages[i].imported)
continue;
const int64_t stage_start = os_time_get_nano();
mesa_shader_stage stage = compile_info.stages[i].stage;
if (compile_info.retain_precomp) {
VK_FROM_HANDLE(vk_pipeline_binary, binary,
bin_info->pPipelineBinaries[binary_index++]);
result = vk_pipeline_load_precomp_from_binary(
device, &compile_info.stages[i], binary);
if (result != VK_SUCCESS)
goto fail_stages;
}
VK_FROM_HANDLE(vk_pipeline_binary, binary,
bin_info->pPipelineBinaries[binary_index++]);
result = vk_pipeline_load_shader_from_binary(
device, &compile_info.stages[i], binary);
if (result != VK_SUCCESS)
goto fail_stages;
stage_feedbacks[stage].flags |=
VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
stage_feedbacks[stage].flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
const int64_t stage_end = os_time_get_nano();
stage_feedbacks[stage].duration += stage_end - stage_start;
}
}
} else {
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
const VkPipelineShaderStageCreateInfo *stage_info =
&pCreateInfo->pStages[i];
const int64_t stage_start = os_time_get_nano();
assert(util_bitcount(stage_info->stage) == 1);
mesa_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
/* We don't need to load anything for imported stages, precomp should be
* included if
* VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT was
* provided and shader should obviously be there.
*/
if (compile_info.stages[compile_info.stage_to_index[stage]].imported)
continue;
stage_feedbacks[stage].flags |=
VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
stage_feedbacks[stage].flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
struct vk_pipeline_stage *pipeline_stage =
&compile_info.stages[compile_info.stage_to_index[stage]];
result = vk_pipeline_precompile_shader(device, cache, pipeline_flags,
pCreateInfo->pNext,
stage_info, pipeline_stage);
if (result != VK_SUCCESS)
goto fail_stages;
const int64_t stage_end = os_time_get_nano();
stage_feedbacks[stage].duration += stage_end - stage_start;
}
result = vk_graphics_pipeline_compile_shaders(device, cache,
pipeline_flags,
pipeline_layout,
&compile_info,
stage_feedbacks);
if (result != VK_SUCCESS)
goto fail_stages;
}
/* Keep a reference on the set layouts */
pipeline->set_layout_count = compile_info.set_layout_count;
for (uint32_t i = 0; i < compile_info.set_layout_count; i++) {
if (compile_info.set_layouts[i] == NULL)
continue;
pipeline->set_layouts[i] =
vk_descriptor_set_layout_ref(compile_info.set_layouts[i]);
}
pipeline->stage_count = compile_info.stage_count;
for (uint32_t i = 0; i < compile_info.stage_count; i++) {
pipeline->base.stages |= mesa_to_vk_shader_stage(compile_info.stages[i].stage);
pipeline->stages[i] = vk_pipeline_stage_clone(&compile_info.stages[i]);
}
/* Throw away precompiled shaders unless the client explicitly asks us to
* keep them.
*/
if (!compile_info.retain_precomp) {
for (uint32_t i = 0; i < compile_info.stage_count; i++) {
if (pipeline->stages[i].precomp != NULL) {
vk_pipeline_precomp_shader_unref(device, pipeline->stages[i].precomp);
pipeline->stages[i].precomp = NULL;
}
}
}
const int64_t pipeline_end = os_time_get_nano();
if (feedback_info != NULL) {
VkPipelineCreationFeedback pipeline_feedback = {
.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
.duration = pipeline_end - pipeline_start,
};
/* From the Vulkan 1.3.275 spec:
*
* "An implementation should set the
* VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT
* bit if it was able to avoid the large majority of pipeline or
* pipeline stage creation work by using the pipelineCache parameter"
*
* We really shouldn't set this bit unless all the shaders hit the
* cache.
*/
uint32_t cache_hit_count = 0;
for (uint32_t i = 0; i < compile_info.stage_count; i++) {
const mesa_shader_stage stage = compile_info.stages[i].stage;
if (stage_feedbacks[stage].flags &
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT)
cache_hit_count++;
}
if (cache_hit_count > 0 && cache_hit_count == compile_info.stage_count) {
pipeline_feedback.flags |=
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
}
*feedback_info->pPipelineCreationFeedback = pipeline_feedback;
/* VUID-VkGraphicsPipelineCreateInfo-pipelineStageCreationFeedbackCount-06594 */
assert(feedback_info->pipelineStageCreationFeedbackCount == 0 ||
feedback_info->pipelineStageCreationFeedbackCount ==
pCreateInfo->stageCount);
for (uint32_t i = 0;
i < feedback_info->pipelineStageCreationFeedbackCount; i++) {
const mesa_shader_stage stage =
vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage);
feedback_info->pPipelineStageCreationFeedbacks[i] =
stage_feedbacks[stage];
}
}
vk_release_graphics_pipeline_compile_info(&compile_info, device, pAllocator);
*pPipeline = vk_pipeline_to_handle(&pipeline->base);
return VK_SUCCESS;
fail_stages:
vk_graphics_pipeline_destroy(device, &pipeline->base, pAllocator);
vk_release_graphics_pipeline_compile_info(&compile_info, device, pAllocator);
return result;
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_CreateGraphicsPipelines(VkDevice _device,
VkPipelineCache pipelineCache,
uint32_t createInfoCount,
const VkGraphicsPipelineCreateInfo *pCreateInfos,
const VkAllocationCallbacks *pAllocator,
VkPipeline *pPipelines)
{
VK_FROM_HANDLE(vk_device, device, _device);
VK_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache);
VkResult first_error_or_success = VK_SUCCESS;
/* Use implicit pipeline cache if there's no cache set */
if (!cache && device->mem_cache)
cache = device->mem_cache;
/* From the Vulkan 1.3.274 spec:
*
* "When attempting to create many pipelines in a single command, it is
* possible that creation may fail for a subset of them. In this case,
* the corresponding elements of pPipelines will be set to
* VK_NULL_HANDLE.
*/
memset(pPipelines, 0, createInfoCount * sizeof(*pPipelines));
unsigned i = 0;
for (; i < createInfoCount; i++) {
VkResult result = vk_create_graphics_pipeline(device, cache,
&pCreateInfos[i],
pAllocator,
&pPipelines[i]);
if (result == VK_SUCCESS)
continue;
if (first_error_or_success == VK_SUCCESS)
first_error_or_success = result;
/* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
* is not obvious what error should be report upon 2 different failures.
*/
if (result != VK_PIPELINE_COMPILE_REQUIRED)
return result;
const VkPipelineCreateFlags2KHR flags =
vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
return result;
}
return first_error_or_success;
}
struct vk_compute_pipeline {
struct vk_pipeline base;
struct vk_pipeline_stage stage;
};
static void
vk_compute_pipeline_destroy(struct vk_device *device,
struct vk_pipeline *pipeline,
const VkAllocationCallbacks *pAllocator)
{
struct vk_compute_pipeline *comp_pipeline =
container_of(pipeline, struct vk_compute_pipeline, base);
vk_pipeline_stage_finish(device, &comp_pipeline->stage);
vk_pipeline_free(device, pAllocator, pipeline);
}
static void
vk_compute_pipeline_cmd_bind(struct vk_command_buffer *cmd_buffer,
struct vk_pipeline *pipeline)
{
struct vk_device *device = cmd_buffer->base.device;
const struct vk_device_shader_ops *ops = device->shader_ops;
struct vk_shader *shader = NULL;
if (pipeline != NULL) {
assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
struct vk_compute_pipeline *comp_pipeline =
container_of(pipeline, struct vk_compute_pipeline, base);
shader = comp_pipeline->stage.shader;
cmd_buffer->pipeline_shader_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
} else {
cmd_buffer->pipeline_shader_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT;
}
mesa_shader_stage stage = MESA_SHADER_COMPUTE;
ops->cmd_bind_shaders(cmd_buffer, 1, &stage, &shader);
}
static void
vk_get_compute_pipeline_compile_info(struct vk_pipeline_stage *stage,
struct vk_device *device,
const VkComputePipelineCreateInfo *pCreateInfo)
{
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
const struct vk_device_shader_ops *ops = device->shader_ops;
*stage = (struct vk_pipeline_stage) { .stage = MESA_SHADER_COMPUTE, };
const VkPipelineBinaryInfoKHR *bin_info =
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_BINARY_INFO_KHR);
/*
* "If a VkPipelineBinaryInfoKHR structure with a binaryCount greater
* than 0 is included in the pNext chain of any Vk*PipelineCreateInfo
* structure when creating a pipeline, implementations must use the
* data in pPipelineBinaries instead of recalculating it. Any shader
* module identifiers or shader modules declared in
* VkPipelineShaderStageCreateInfo instances are ignored."
*
* There is no point in computing a precomp/shader hash at this point,
* since we don't have any information.
*/
if (bin_info == NULL || bin_info->binaryCount == 0) {
const VkPushConstantRange *push_range =
get_push_range_for_stage(pipeline_layout, MESA_SHADER_COMPUTE);
const VkPipelineCreateFlags2KHR pipeline_flags =
vk_compute_pipeline_create_flags(pCreateInfo);
const VkShaderCreateFlagsEXT shader_flags =
vk_pipeline_to_shader_flags(pipeline_flags, MESA_SHADER_COMPUTE);
vk_pipeline_hash_precomp_shader_stage(device, pipeline_flags, pCreateInfo->pNext,
&pCreateInfo->stage, stage);
struct mesa_blake3 blake3_ctx;
_mesa_blake3_init(&blake3_ctx);
_mesa_blake3_update(&blake3_ctx, &stage->stage, sizeof(stage->stage));
_mesa_blake3_update(&blake3_ctx, stage->precomp_key,
sizeof(stage->precomp_key));
_mesa_blake3_update(&blake3_ctx, &shader_flags, sizeof(shader_flags));
blake3_hash features_blake3;
ops->hash_state(device->physical, NULL /* state */,
&device->enabled_features, VK_SHADER_STAGE_COMPUTE_BIT,
features_blake3);
_mesa_blake3_update(&blake3_ctx, features_blake3, sizeof(features_blake3));
for (uint32_t i = 0; i < pipeline_layout->set_count; i++) {
if (pipeline_layout->set_layouts[i] != NULL) {
_mesa_blake3_update(&blake3_ctx,
pipeline_layout->set_layouts[i]->blake3,
sizeof(pipeline_layout->set_layouts[i]->blake3));
}
}
if (push_range != NULL)
_mesa_blake3_update(&blake3_ctx, push_range, sizeof(*push_range));
_mesa_blake3_final(&blake3_ctx, stage->shader_key);
}
}
static VkResult
vk_pipeline_compile_compute_stage(struct vk_device *device,
struct vk_pipeline_cache *cache,
struct vk_compute_pipeline *pipeline,
struct vk_pipeline_layout *pipeline_layout,
struct vk_pipeline_stage *stage,
bool *cache_hit)
{
const struct vk_device_shader_ops *ops = device->shader_ops;
VkResult result;
if (cache != NULL) {
struct vk_pipeline_cache_object *cache_obj =
vk_pipeline_cache_lookup_object(cache, stage->shader_key,
sizeof(stage->shader_key),
&pipeline_shader_cache_ops,
cache_hit);
if (cache_obj != NULL) {
stage->shader = vk_shader_from_cache_obj(cache_obj);
return VK_SUCCESS;
}
}
if (pipeline->base.flags &
VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
return VK_PIPELINE_COMPILE_REQUIRED;
const struct nir_shader_compiler_options *nir_options =
ops->get_nir_options(device->physical, stage->stage,
&stage->precomp->rs);
nir_shader *nir = vk_pipeline_precomp_shader_get_nir(stage->precomp,
nir_options);
if (nir == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
const VkPushConstantRange *push_range =
get_push_range_for_stage(pipeline_layout, MESA_SHADER_COMPUTE);
VkShaderCreateFlagsEXT shader_flags =
vk_pipeline_to_shader_flags(pipeline->base.flags, MESA_SHADER_COMPUTE);
/* vk_device_shader_ops::compile() consumes the NIR regardless of whether
* or not it succeeds and only generates shaders on success. Once compile()
* returns, we own the shaders but not the NIR in infos.
*/
struct vk_shader_compile_info compile_info = {
.stage = stage->stage,
.flags = shader_flags,
.next_stage_mask = 0,
.nir = nir,
.robustness = &stage->precomp->rs,
.set_layout_count = pipeline_layout->set_count,
.set_layouts = pipeline_layout->set_layouts,
.push_constant_range_count = push_range != NULL,
.push_constant_ranges = push_range != NULL ? push_range : NULL,
};
struct vk_shader *shader;
result = vk_compile_shaders(device, 1, &compile_info,
NULL, &device->enabled_features,
&device->alloc, &shader);
if (result != VK_SUCCESS)
return result;
vk_shader_init_cache_obj(device, shader, &stage->shader_key,
sizeof(stage->shader_key));
struct vk_pipeline_cache_object *cache_obj = &shader->pipeline.cache_obj;
if (cache != NULL)
cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
stage->shader = vk_shader_from_cache_obj(cache_obj);
return VK_SUCCESS;
}
static VkResult
vk_compute_pipeline_get_executable_properties(
struct vk_device *device,
struct vk_pipeline *pipeline,
uint32_t *executable_count,
VkPipelineExecutablePropertiesKHR *properties)
{
struct vk_compute_pipeline *comp_pipeline =
container_of(pipeline, struct vk_compute_pipeline, base);
struct vk_shader *shader = comp_pipeline->stage.shader;
return shader->ops->get_executable_properties(device, shader,
executable_count,
properties);
}
static VkResult
vk_compute_pipeline_get_executable_statistics(
struct vk_device *device,
struct vk_pipeline *pipeline,
uint32_t executable_index,
uint32_t *statistic_count,
VkPipelineExecutableStatisticKHR *statistics)
{
struct vk_compute_pipeline *comp_pipeline =
container_of(pipeline, struct vk_compute_pipeline, base);
struct vk_shader *shader = comp_pipeline->stage.shader;
return shader->ops->get_executable_statistics(device, shader,
executable_index,
statistic_count,
statistics);
}
static VkResult
vk_compute_pipeline_get_internal_representations(
struct vk_device *device,
struct vk_pipeline *pipeline,
uint32_t executable_index,
uint32_t *internal_representation_count,
VkPipelineExecutableInternalRepresentationKHR* internal_representations)
{
struct vk_compute_pipeline *comp_pipeline =
container_of(pipeline, struct vk_compute_pipeline, base);
struct vk_shader *shader = comp_pipeline->stage.shader;
return shader->ops->get_executable_internal_representations(
device, shader, executable_index,
internal_representation_count, internal_representations);
}
static struct vk_shader *
vk_compute_pipeline_get_shader(struct vk_pipeline *pipeline,
mesa_shader_stage stage)
{
struct vk_compute_pipeline *comp_pipeline =
container_of(pipeline, struct vk_compute_pipeline, base);
assert(stage == MESA_SHADER_COMPUTE);
return comp_pipeline->stage.shader;
}
static const struct vk_pipeline_ops vk_compute_pipeline_ops = {
.destroy = vk_compute_pipeline_destroy,
.get_executable_statistics = vk_compute_pipeline_get_executable_statistics,
.get_executable_properties = vk_compute_pipeline_get_executable_properties,
.get_internal_representations = vk_compute_pipeline_get_internal_representations,
.cmd_bind = vk_compute_pipeline_cmd_bind,
.get_shader = vk_compute_pipeline_get_shader,
};
static VkResult
vk_create_compute_pipeline(struct vk_device *device,
struct vk_pipeline_cache *cache,
const VkComputePipelineCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkPipeline *pPipeline)
{
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
int64_t pipeline_start = os_time_get_nano();
VkResult result;
struct vk_pipeline_stage stage;
vk_get_compute_pipeline_compile_info(&stage, device, pCreateInfo);
const VkPipelineCreateFlags2KHR pipeline_flags =
vk_compute_pipeline_create_flags(pCreateInfo);
const VkPipelineBinaryInfoKHR *bin_info =
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_BINARY_INFO_KHR);
const VkPipelineCreationFeedbackCreateInfo *feedback_info =
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
struct vk_compute_pipeline *pipeline =
vk_pipeline_zalloc(device, &vk_compute_pipeline_ops,
VK_PIPELINE_BIND_POINT_COMPUTE,
pipeline_flags, pAllocator, sizeof(*pipeline));
if (pipeline == NULL) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_stage;
}
pipeline->base.stages = VK_SHADER_STAGE_COMPUTE_BIT;
bool cache_hit;
if (bin_info != NULL && bin_info->binaryCount > 0) {
VK_FROM_HANDLE(vk_pipeline_binary, binary,
bin_info->pPipelineBinaries[0]);
result = vk_pipeline_load_shader_from_binary(device, &stage, binary);
if (result != VK_SUCCESS)
goto fail_pipeline;
} else {
result = vk_pipeline_precompile_shader(device, cache, pipeline_flags,
pCreateInfo->pNext,
&pCreateInfo->stage,
&stage);
if (result != VK_SUCCESS)
goto fail_pipeline;
result = vk_pipeline_compile_compute_stage(device, cache, pipeline,
pipeline_layout, &stage,
&cache_hit);
if (result != VK_SUCCESS)
goto fail_pipeline;
}
pipeline->stage = vk_pipeline_stage_clone(&stage);
const int64_t pipeline_end = os_time_get_nano();
if (feedback_info != NULL) {
VkPipelineCreationFeedback pipeline_feedback = {
.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
.duration = pipeline_end - pipeline_start,
};
if (cache_hit && cache != device->mem_cache) {
pipeline_feedback.flags |=
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
}
*feedback_info->pPipelineCreationFeedback = pipeline_feedback;
if (feedback_info->pipelineStageCreationFeedbackCount > 0) {
feedback_info->pPipelineStageCreationFeedbacks[0] =
pipeline_feedback;
}
}
vk_pipeline_stage_finish(device, &stage);
*pPipeline = vk_pipeline_to_handle(&pipeline->base);
return VK_SUCCESS;
fail_pipeline:
vk_pipeline_free(device, pAllocator, &pipeline->base);
fail_stage:
vk_pipeline_stage_finish(device, &stage);
return result;
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_CreateComputePipelines(VkDevice _device,
VkPipelineCache pipelineCache,
uint32_t createInfoCount,
const VkComputePipelineCreateInfo *pCreateInfos,
const VkAllocationCallbacks *pAllocator,
VkPipeline *pPipelines)
{
VK_FROM_HANDLE(vk_device, device, _device);
VK_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache);
VkResult first_error_or_success = VK_SUCCESS;
/* Use implicit pipeline cache if there's no cache set */
if (!cache && device->mem_cache)
cache = device->mem_cache;
/* From the Vulkan 1.3.274 spec:
*
* "When attempting to create many pipelines in a single command, it is
* possible that creation may fail for a subset of them. In this case,
* the corresponding elements of pPipelines will be set to
* VK_NULL_HANDLE.
*/
memset(pPipelines, 0, createInfoCount * sizeof(*pPipelines));
unsigned i = 0;
for (; i < createInfoCount; i++) {
VkResult result = vk_create_compute_pipeline(device, cache,
&pCreateInfos[i],
pAllocator,
&pPipelines[i]);
if (result == VK_SUCCESS)
continue;
if (first_error_or_success == VK_SUCCESS)
first_error_or_success = result;
/* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
* is not obvious what error should be report upon 2 different failures.
*/
if (result != VK_PIPELINE_COMPILE_REQUIRED)
return result;
const VkPipelineCreateFlags2KHR flags =
vk_compute_pipeline_create_flags(&pCreateInfos[i]);
if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
return result;
}
return first_error_or_success;
}
void
vk_cmd_unbind_pipelines_for_stages(struct vk_command_buffer *cmd_buffer,
VkShaderStageFlags stages)
{
stages &= cmd_buffer->pipeline_shader_stages;
if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT)
vk_graphics_pipeline_cmd_bind(cmd_buffer, NULL);
if (stages & VK_SHADER_STAGE_COMPUTE_BIT)
vk_compute_pipeline_cmd_bind(cmd_buffer, NULL);
}
struct vk_rt_stage {
bool linked : 1;
bool imported : 1;
struct vk_shader *shader;
};
struct vk_rt_shader_group {
VkRayTracingShaderGroupTypeKHR type;
struct vk_rt_stage stages[3];
uint32_t stage_count;
};
struct vk_rt_pipeline {
struct vk_pipeline base;
uint32_t group_count;
struct vk_rt_shader_group *groups;
uint32_t stage_count;
struct vk_rt_stage *stages;
VkDeviceSize stack_size;
VkDeviceSize scratch_size;
uint32_t ray_queries;
};
static struct vk_rt_stage
vk_rt_stage_ref(struct vk_rt_stage *stage)
{
if (stage->shader)
vk_shader_ref(stage->shader);
return *stage;
}
static void
vk_rt_shader_group_destroy(struct vk_device *device,
struct vk_rt_shader_group *group)
{
for (uint32_t i = 0; i < group->stage_count; i++)
vk_shader_unref(device, group->stages[i].shader);
}
static struct vk_rt_shader_group
vk_rt_shader_group_clone(struct vk_rt_shader_group *other)
{
struct vk_rt_shader_group group = *other;
for (uint32_t i = 0; i < ARRAY_SIZE(other->stages); i++)
group.stages[i] = vk_rt_stage_ref(&other->stages[i]);
return group;
}
static void
vk_rt_pipeline_destroy(struct vk_device *device,
struct vk_pipeline *pipeline,
const VkAllocationCallbacks *pAllocator)
{
struct vk_rt_pipeline *rt_pipeline =
container_of(pipeline, struct vk_rt_pipeline, base);
for (uint32_t i = 0; i < rt_pipeline->group_count; i++)
vk_rt_shader_group_destroy(device, &rt_pipeline->groups[i]);
for (uint32_t i = 0; i < rt_pipeline->stage_count; i++)
vk_shader_unref(device, rt_pipeline->stages[i].shader);
vk_pipeline_free(device, pAllocator, pipeline);
}
static void
vk_rt_pipeline_cmd_bind(struct vk_command_buffer *cmd_buffer,
struct vk_pipeline *pipeline)
{
if (pipeline != NULL) {
struct vk_device *device = cmd_buffer->base.device;
const struct vk_device_shader_ops *ops = device->shader_ops;
struct vk_rt_pipeline *rt_pipeline =
container_of(pipeline, struct vk_rt_pipeline, base);
ops->cmd_set_rt_state(cmd_buffer,
rt_pipeline->scratch_size,
rt_pipeline->ray_queries);
if (rt_pipeline->stack_size > 0)
ops->cmd_set_stack_size(cmd_buffer, rt_pipeline->stack_size);
assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
cmd_buffer->pipeline_shader_stages |= pipeline->stages;
} else {
cmd_buffer->pipeline_shader_stages &= ~(VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR);
}
}
static uint32_t
stages_mask(uint32_t stage_count, struct vk_pipeline_stage *stages)
{
uint32_t stage_mask = 0;
for (uint32_t i = 0; i < stage_count; i++)
stage_mask |= BITFIELD_BIT(stages[i].stage);
return stage_mask;
}
static void
hash_rt_parameters(struct mesa_blake3 *blake3_ctx,
VkShaderCreateFlagsEXT shader_flags,
VkPipelineCreateFlags2KHR _rt_flags,
const VkPushConstantRange *push_range,
struct vk_pipeline_layout *pipeline_layout)
{
/* We don't want all the flags to be part of the hash (things like
* VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT in
* particular)
*/
const VkPipelineCreateFlags2KHR rt_flags =
_rt_flags & MESA_VK_PIPELINE_RAY_TRACING_FLAGS;
_mesa_blake3_update(blake3_ctx, &shader_flags, sizeof(shader_flags));
_mesa_blake3_update(blake3_ctx, &rt_flags, sizeof(rt_flags));
for (uint32_t i = 0; i < pipeline_layout->set_count; i++) {
if (pipeline_layout->set_layouts[i] != NULL) {
_mesa_blake3_update(blake3_ctx,
pipeline_layout->set_layouts[i]->blake3,
sizeof(pipeline_layout->set_layouts[i]->blake3));
}
}
if (push_range != NULL)
_mesa_blake3_update(blake3_ctx, push_range, sizeof(*push_range));
}
static void
vk_pipeline_hash_rt_shader(struct vk_device *device,
VkPipelineCreateFlags2KHR pipeline_flags,
struct vk_pipeline_layout *pipeline_layout,
struct vk_pipeline_stage *stage)
{
const VkPushConstantRange *push_range =
get_push_range_for_stage(pipeline_layout, stage->stage);
struct mesa_blake3 blake3_ctx;
_mesa_blake3_init(&blake3_ctx);
_mesa_blake3_update(&blake3_ctx, &stage->stage,
sizeof(stage->stage));
VkShaderCreateFlagsEXT shader_flags =
vk_pipeline_to_shader_flags(pipeline_flags, stage->stage);
hash_rt_parameters(&blake3_ctx, shader_flags,
pipeline_flags,
push_range, pipeline_layout);
_mesa_blake3_update(&blake3_ctx, stage->precomp_key,
sizeof(stage->precomp_key));
_mesa_blake3_final(&blake3_ctx, stage->shader_key);
}
static void
vk_pipeline_rehash_rt_linked_shaders(struct vk_device *device,
VkPipelineCreateFlags2KHR pipeline_flags,
const VkPipelineBinaryInfoKHR *bin_info,
struct vk_pipeline_layout *pipeline_layout,
struct vk_pipeline_stage *stages,
uint32_t stage_count,
VkShaderStageFlags linked_stages)
{
/* Either there is no linking going on, or we must at least have 2 stages
* linked together.
*/
assert(linked_stages == 0 || util_bitcount(linked_stages) >= 2);
for (uint32_t i = 0; i < stage_count; i++) {
/* If this isn't a linked shader stage, there's nothing to do. The
* hash we got from vk_pipeline_hash_rt_shader() is fine.
*/
if (!(mesa_to_vk_shader_stage(stages[i].stage) & linked_stages))
continue;
stages[i].linked = true;
if (bin_info == NULL || bin_info->binaryCount == 0) {
struct mesa_blake3 blake3_ctx;
_mesa_blake3_init(&blake3_ctx);
assert(mesa_shader_stage_is_rt(stages[i].stage));
_mesa_blake3_update(&blake3_ctx, &stages[i].stage,
sizeof(stages[i].stage));
const VkPushConstantRange *push_range =
get_push_range_for_stage(pipeline_layout, stages[i].stage);
VkShaderCreateFlagsEXT shader_flags =
vk_pipeline_to_shader_flags(pipeline_flags, stages[i].stage);
hash_rt_parameters(&blake3_ctx, shader_flags, pipeline_flags,
push_range, pipeline_layout);
/* Tie the shader to all the other shaders we're linking with */
for (uint32_t j = 0; j < stage_count; j++) {
if (mesa_to_vk_shader_stage(stages[j].stage) & linked_stages) {
_mesa_blake3_update(&blake3_ctx, stages[j].precomp_key,
sizeof(stages[j].precomp_key));
}
}
_mesa_blake3_final(&blake3_ctx, stages[i].shader_key);
}
}
}
struct vk_rt_group_compile_info {
VkRayTracingShaderGroupTypeKHR type;
/* Indice of the stages in vk_rt_pipeline_compile_info::stages[] */
uint32_t stage_indices[3];
struct vk_pipeline_stage stages[3];
uint32_t stage_count;
};
struct vk_rt_pipeline_compile_info {
struct vk_pipeline_stage *stages;
uint32_t stage_count;
struct vk_rt_group_compile_info *groups;
uint32_t group_count;
void *data;
};
static void
vk_rt_group_compile_info_finish(struct vk_device *device,
struct vk_rt_group_compile_info *group)
{
for (uint32_t i = 0; i < group->stage_count; i++)
vk_pipeline_stage_finish(device, &group->stages[i]);
}
static struct vk_rt_stage
vk_rt_stage_from_pipeline_stage(struct vk_pipeline_stage *stage)
{
return (struct vk_rt_stage) {
.shader = vk_shader_ref(stage->shader),
.linked = stage->linked,
};
}
static struct vk_pipeline_stage
vk_pipeline_stage_from_rt_stage(struct vk_rt_stage *stage)
{
struct vk_pipeline_stage ret = {
.stage = stage->shader->stage,
.shader = vk_shader_ref(stage->shader),
.linked = stage->linked,
.imported = true,
/* precomp & precomp_key left empty on purpose */
};
assert(sizeof(ret.shader_key) ==
sizeof(stage->shader->pipeline.cache_key));
memcpy(ret.shader_key, stage->shader->pipeline.cache_key,
sizeof(stage->shader->pipeline.cache_key));
return ret;
}
static struct vk_rt_shader_group
vk_rt_shader_group_from_compile_info(struct vk_rt_group_compile_info *group_info)
{
assert(group_info->stage_count > 0);
struct vk_rt_shader_group group = (struct vk_rt_shader_group) {
.type = group_info->type,
.stage_count = group_info->stage_count,
};
for (uint32_t i = 0; i < group_info->stage_count; i++) {
assert(group_info->stages[i].shader != NULL);
group.stages[i] = (struct vk_rt_stage) {
.imported = true,
.linked = group_info->stages[i].linked,
.shader = vk_shader_ref(group_info->stages[i].shader),
};
}
return group;
}
static VkResult
vk_get_rt_pipeline_compile_info(struct vk_rt_pipeline_compile_info *info,
struct vk_device *device,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator)
{
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
const struct vk_device_shader_ops *ops = device->shader_ops;
memset(info, 0, sizeof(*info));
uint32_t libraries_stage_count = 0;
uint32_t libraries_group_count = 0;
const VkPipelineLibraryCreateInfoKHR *libs_info = pCreateInfo->pLibraryInfo;
if (libs_info != NULL) {
for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
VK_FROM_HANDLE(vk_pipeline, lib_pipeline, libs_info->pLibraries[i]);
assert(lib_pipeline->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
assert(lib_pipeline->flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR);
struct vk_rt_pipeline *lib_rt_pipeline =
container_of(lib_pipeline, struct vk_rt_pipeline, base);
libraries_stage_count += lib_rt_pipeline->stage_count;
libraries_group_count += lib_rt_pipeline->group_count;
}
}
info->stage_count = libraries_stage_count + pCreateInfo->stageCount;
info->group_count = libraries_group_count + pCreateInfo->groupCount;
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct vk_pipeline_stage, stages, info->stage_count);
VK_MULTIALLOC_DECL(&ma, struct vk_rt_group_compile_info, groups, info->group_count);
info->data = vk_multialloc_zalloc2(&ma, &device->alloc, pAllocator,
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
if (info->data == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
info->stages = stages;
info->groups = groups;
const VkPipelineCreateFlags2KHR pipeline_flags =
vk_rt_pipeline_create_flags(pCreateInfo);
const VkPipelineBinaryInfoKHR *bin_info =
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_BINARY_INFO_KHR);
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
const VkPipelineShaderStageCreateInfo *stage_info =
&pCreateInfo->pStages[i];
info->stages[i] = (struct vk_pipeline_stage) {
.stage = vk_to_mesa_shader_stage(stage_info->stage),
};
if (bin_info == NULL || bin_info->binaryCount == 0) {
vk_pipeline_hash_precomp_shader_stage(device, pipeline_flags,
pCreateInfo->pNext, stage_info,
&info->stages[i]);
vk_pipeline_hash_rt_shader(device, pipeline_flags, pipeline_layout,
&info->stages[i]);
}
}
for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) {
const VkRayTracingShaderGroupCreateInfoKHR *group_info =
&pCreateInfo->pGroups[i];
struct vk_rt_group_compile_info *group = &info->groups[i];
group->stage_count = 0;
switch (group_info->type) {
case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
assert(group_info->generalShader < info->stage_count);
group->stage_indices[group->stage_count++] = group_info->generalShader;
break;
case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
if (group_info->anyHitShader < info->stage_count)
group->stage_indices[group->stage_count++] = group_info->anyHitShader;
if (group_info->closestHitShader < info->stage_count)
group->stage_indices[group->stage_count++] = group_info->closestHitShader;
break;
case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
if (group_info->closestHitShader < info->stage_count)
group->stage_indices[group->stage_count++] = group_info->closestHitShader;
if (group_info->anyHitShader < info->stage_count)
group->stage_indices[group->stage_count++] = group_info->anyHitShader;
assert(group_info->intersectionShader < info->stage_count);
group->stage_indices[group->stage_count++] = group_info->intersectionShader;
break;
default:
UNREACHABLE("Invalid shader group");
}
VkShaderStageFlags group_all_stages = 0;
for (uint32_t s = 0; s < group->stage_count; s++) {
group->stages[s] = vk_pipeline_stage_clone(
&info->stages[group->stage_indices[s]]);
group_all_stages |= mesa_to_vk_shader_stage(group->stages[s].stage);
}
VkShaderStageFlags group_linked_stages =
ops->get_rt_group_linking != NULL ?
ops->get_rt_group_linking(device->physical, group_all_stages) : 0;
/* Compute shader hashes for the linked stages */
vk_pipeline_rehash_rt_linked_shaders(device, pipeline_flags, bin_info,
pipeline_layout,
group->stages, group->stage_count,
group_linked_stages);
/* It is possible to have a group with 0 shaders. */
assert(group->stage_count <= 3);
}
if (libs_info != NULL) {
uint32_t stage_index = pCreateInfo->stageCount;
uint32_t group_index = pCreateInfo->groupCount;
for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
VK_FROM_HANDLE(vk_pipeline, lib_pipeline, libs_info->pLibraries[i]);
struct vk_rt_pipeline *lib_rt_pipeline =
container_of(lib_pipeline, struct vk_rt_pipeline, base);
for (uint32_t s = 0; s < lib_rt_pipeline->stage_count; s++) {
info->stages[stage_index++] =
vk_pipeline_stage_from_rt_stage(&lib_rt_pipeline->stages[s]);
assert(stage_index <= info->stage_count);
}
for (uint32_t g = 0; g < lib_rt_pipeline->group_count; g++) {
struct vk_rt_shader_group *lib_rt_group = &lib_rt_pipeline->groups[g];
struct vk_rt_group_compile_info *group = &info->groups[group_index++];
*group = (struct vk_rt_group_compile_info) {
.type = lib_rt_group->type,
.stage_count = lib_rt_group->stage_count,
};
for (uint32_t s = 0; s < group->stage_count; s++) {
group->stages[s] =
vk_pipeline_stage_from_rt_stage(&lib_rt_group->stages[s]);
}
}
assert(group_index <= info->group_count);
}
}
return VK_SUCCESS;
}
static void
vk_release_rt_pipeline_compile_info(struct vk_rt_pipeline_compile_info *info,
struct vk_device *device,
const VkAllocationCallbacks *pAllocator)
{
for (uint32_t i = 0; i < info->group_count; i++)
vk_rt_group_compile_info_finish(device, &info->groups[i]);
for (uint32_t i = 0; i < info->stage_count; i++)
vk_pipeline_stage_finish(device, &info->stages[i]);
vk_free2(&device->alloc, pAllocator, info->data);
}
static VkResult
vk_pipeline_compile_rt_shader(struct vk_device *device,
struct vk_pipeline_cache *cache,
VkPipelineCreateFlags2KHR pipeline_flags,
struct vk_pipeline_layout *pipeline_layout,
struct vk_pipeline_stage *stage,
VkPipelineCreationFeedback *stage_feedback)
{
const struct vk_device_shader_ops *ops = device->shader_ops;
int64_t stage_start = os_time_get_nano();
if (cache != NULL) {
bool cache_hit = false;
struct vk_pipeline_cache_object *cache_obj =
vk_pipeline_cache_lookup_object(cache, stage->shader_key,
sizeof(stage->shader_key),
&pipeline_shader_cache_ops,
&cache_hit);
if (cache_obj != NULL) {
stage->shader = vk_shader_from_cache_obj(cache_obj);
if (stage_feedback != NULL) {
const int64_t stage_end = os_time_get_nano();
stage_feedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
if (cache_hit) {
stage_feedback->flags |=
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
}
stage_feedback->duration = stage_end - stage_start;
}
return VK_SUCCESS;
}
}
if (pipeline_flags &
VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
return VK_PIPELINE_COMPILE_REQUIRED;
const struct nir_shader_compiler_options *nir_options =
ops->get_nir_options(device->physical, stage->stage, &stage->precomp->rs);
nir_shader *nir = vk_pipeline_precomp_shader_get_nir(stage->precomp,
nir_options);
if (nir == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
const VkPushConstantRange *push_range =
get_push_range_for_stage(pipeline_layout, stage->stage);
VkShaderCreateFlagsEXT shader_flags =
vk_pipeline_to_shader_flags(pipeline_flags, stage->stage);
/* vk_device_shader_ops::compile() consumes the NIR regardless of
* whether or not it succeeds and only generates shaders on success.
* Once compile() returns, we own the shaders but not the NIR in
* infos.
*/
struct vk_shader_compile_info compile_info = {
.stage = stage->stage,
.flags = shader_flags,
.rt_flags = pipeline_flags & MESA_VK_PIPELINE_RAY_TRACING_FLAGS,
.next_stage_mask = 0,
.nir = nir,
.robustness = &stage->precomp->rs,
.set_layout_count = pipeline_layout->set_count,
.set_layouts = pipeline_layout->set_layouts,
.push_constant_range_count = push_range != NULL,
.push_constant_ranges = push_range != NULL ? push_range : NULL,
};
struct vk_shader *shader;
VkResult result = vk_compile_shaders(device, 1, &compile_info,
NULL, &device->enabled_features,
&device->alloc, &shader);
if (result != VK_SUCCESS)
return result;
vk_shader_init_cache_obj(device, shader,
&stage->shader_key,
sizeof(stage->shader_key));
struct vk_pipeline_cache_object *cache_obj = &shader->pipeline.cache_obj;
if (cache != NULL)
cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
stage->shader = vk_shader_from_cache_obj(cache_obj);
if (stage_feedback != NULL) {
const int64_t stage_end = os_time_get_nano();
stage_feedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
stage_feedback->duration = stage_end - stage_start;
}
return VK_SUCCESS;
}
static VkResult
vk_pipeline_compile_rt_shader_group(struct vk_device *device,
struct vk_pipeline_cache *cache,
VkPipelineCreateFlags2KHR pipeline_flags,
struct vk_pipeline_layout *pipeline_layout,
uint32_t stage_count,
struct vk_pipeline_stage *stages,
bool *all_cache_hit)
{
const struct vk_device_shader_ops *ops = device->shader_ops;
assert(stage_count > 1 && stage_count <= 3);
if (cache != NULL) {
*all_cache_hit = true;
bool all_shaders_found = true;
for (uint32_t i = 0; i < stage_count; i++) {
bool cache_hit = false;
struct vk_pipeline_cache_object *cache_obj =
vk_pipeline_cache_lookup_object(cache,
stages[i].shader_key,
sizeof(stages[i].shader_key),
&pipeline_shader_cache_ops,
&cache_hit);
if (cache_obj != NULL) {
assert(stages[i].shader == NULL);
stages[i].shader = vk_shader_from_cache_obj(cache_obj);
} else {
all_shaders_found = false;
}
if (cache_obj == NULL && !cache_hit)
*all_cache_hit = false;
}
if (all_shaders_found)
return VK_SUCCESS;
} else {
*all_cache_hit = false;
}
/* Unref all the shaders found in the cache, we're going to do a compile
* anyway.
*/
for (uint32_t i = 0; i < stage_count; i++) {
if (stages[i].shader) {
vk_shader_unref(device, stages[i].shader);
stages[i].shader = NULL;
}
}
if (pipeline_flags &
VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
return VK_PIPELINE_COMPILE_REQUIRED;
struct vk_shader_compile_info compile_info[3] = { 0 };
for (uint32_t i = 0; i < stage_count; i++) {
mesa_shader_stage stage = stages[i].stage;
struct vk_pipeline_precomp_shader *precomp = stages[i].precomp;
assert(precomp != NULL);
const VkPushConstantRange *push_range =
get_push_range_for_stage(pipeline_layout, stages[i].stage);
const struct nir_shader_compiler_options *nir_options =
ops->get_nir_options(device->physical, stage, &precomp->rs);
compile_info[i] = (struct vk_shader_compile_info) {
.stage = stages[i].stage,
.flags = vk_pipeline_to_shader_flags(pipeline_flags,
stages[i].stage),
.rt_flags = pipeline_flags & MESA_VK_PIPELINE_RAY_TRACING_FLAGS,
.next_stage_mask = 0,
.nir = vk_pipeline_precomp_shader_get_nir(precomp, nir_options),
.robustness = &precomp->rs,
.set_layout_count = pipeline_layout->set_count,
.set_layouts = pipeline_layout->set_layouts,
.push_constant_range_count = push_range != NULL,
.push_constant_ranges = push_range != NULL ? push_range : NULL,
};
if (compile_info[i].nir == NULL) {
for (uint32_t j = 0; j < i; j++)
ralloc_free(compile_info[i].nir);
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
}
struct vk_shader *shaders[3];
VkResult result = vk_compile_shaders(device, stage_count, compile_info,
NULL, &device->enabled_features,
&device->alloc, shaders);
if (result != VK_SUCCESS)
return result;
for (uint32_t i = 0; i < stage_count; i++) {
vk_shader_init_cache_obj(device, shaders[i],
&stages[i].shader_key,
sizeof(stages[i].shader_key));
struct vk_pipeline_cache_object *cache_obj =
&shaders[i]->pipeline.cache_obj;
if (cache != NULL)
cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
stages[i].shader = vk_shader_from_cache_obj(cache_obj);
}
return VK_SUCCESS;
}
static VkResult
vk_rt_pipeline_get_executable_properties(
struct vk_device *device,
struct vk_pipeline *pipeline,
uint32_t *executable_count,
VkPipelineExecutablePropertiesKHR *properties)
{
struct vk_rt_pipeline *rt_pipeline =
container_of(pipeline, struct vk_rt_pipeline, base);
VkResult result;
if (properties == NULL) {
*executable_count = 0;
for (uint32_t i = 0; i < rt_pipeline->stage_count; i++) {
struct vk_shader *shader = rt_pipeline->stages[i].shader;
uint32_t shader_exec_count = 0;
result = shader->ops->get_executable_properties(device, shader,
&shader_exec_count,
NULL);
assert(shader_exec_count == 1);
assert(result == VK_SUCCESS);
*executable_count += shader_exec_count;
}
} else {
uint32_t arr_len = *executable_count;
*executable_count = 0;
for (uint32_t i = 0; i < rt_pipeline->stage_count; i++) {
struct vk_shader *shader = rt_pipeline->stages[i].shader;
uint32_t shader_exec_count = arr_len - *executable_count;
result = shader->ops->get_executable_properties(device, shader,
&shader_exec_count,
&properties[*executable_count]);
if (result != VK_SUCCESS)
return result;
assert(shader_exec_count == 1);
*executable_count += shader_exec_count;
}
}
return VK_SUCCESS;
}
static VkResult
vk_rt_pipeline_get_executable_statistics(
struct vk_device *device,
struct vk_pipeline *pipeline,
uint32_t executable_index,
uint32_t *statistic_count,
VkPipelineExecutableStatisticKHR *statistics)
{
struct vk_rt_pipeline *rt_pipeline =
container_of(pipeline, struct vk_rt_pipeline, base);
assert(executable_index < rt_pipeline->stage_count);
struct vk_shader *shader = rt_pipeline->stages[executable_index].shader;
return shader->ops->get_executable_statistics(device, shader, 0,
statistic_count,
statistics);
}
static VkResult
vk_rt_pipeline_get_internal_representations(
struct vk_device *device,
struct vk_pipeline *pipeline,
uint32_t executable_index,
uint32_t *internal_representation_count,
VkPipelineExecutableInternalRepresentationKHR* internal_representations)
{
struct vk_rt_pipeline *rt_pipeline =
container_of(pipeline, struct vk_rt_pipeline, base);
assert(executable_index < rt_pipeline->stage_count);
struct vk_shader *shader = rt_pipeline->stages[executable_index].shader;
return shader->ops->get_executable_internal_representations(
device, shader, 0,
internal_representation_count, internal_representations);
}
static struct vk_shader *
vk_rt_pipeline_get_shader(struct vk_pipeline *pipeline,
mesa_shader_stage stage)
{
UNREACHABLE("Invalid operation");
}
static const struct vk_pipeline_ops vk_rt_pipeline_ops = {
.destroy = vk_rt_pipeline_destroy,
.get_executable_statistics = vk_rt_pipeline_get_executable_statistics,
.get_executable_properties = vk_rt_pipeline_get_executable_properties,
.get_internal_representations = vk_rt_pipeline_get_internal_representations,
.cmd_bind = vk_rt_pipeline_cmd_bind,
.get_shader = vk_rt_pipeline_get_shader,
};
static bool
is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
{
if (info->pDynamicState == NULL)
return false;
for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
if (info->pDynamicState->pDynamicStates[i] ==
VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
return true;
}
return false;
}
static int
cmp_vk_rt_pipeline_stages(const void *_a, const void *_b)
{
const struct vk_rt_stage *a = _a, *b = _b;
return vk_shader_cmp_rt_stages(a->shader->stage, b->shader->stage);
}
static VkResult
vk_create_rt_pipeline(struct vk_device *device,
struct vk_pipeline_cache *cache,
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkPipeline *pPipeline)
{
VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, pCreateInfo->layout);
int64_t pipeline_start = os_time_get_nano();
VkResult result;
struct vk_rt_pipeline_compile_info compile_info;
result = vk_get_rt_pipeline_compile_info(&compile_info, device,
pCreateInfo, pAllocator);
if (result != VK_SUCCESS)
return result;
const VkPipelineCreateFlags2KHR pipeline_flags =
vk_rt_pipeline_create_flags(pCreateInfo);
const VkPipelineBinaryInfoKHR *bin_info =
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_BINARY_INFO_KHR);
const VkPipelineCreationFeedbackCreateInfo *feedback_info =
vk_find_struct_const(pCreateInfo->pNext,
PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
VK_MULTIALLOC(ma);
VK_MULTIALLOC_DECL(&ma, struct vk_rt_pipeline, _pipeline, 1);
VK_MULTIALLOC_DECL(&ma, struct vk_rt_stage, pipeline_stages,
compile_info.stage_count);
VK_MULTIALLOC_DECL(&ma, struct vk_rt_shader_group, pipeline_groups,
compile_info.group_count);
struct vk_rt_pipeline *pipeline =
vk_pipeline_multizalloc(device, &ma, &vk_rt_pipeline_ops,
VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
pipeline_flags, pAllocator);
if (pipeline == NULL) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_pipeline;
}
pipeline->stages = pipeline_stages;
pipeline->groups = pipeline_groups;
bool all_cache_hit = true;
uint32_t stack_max[MESA_SHADER_KERNEL] = { 0 };
uint32_t binary_index = 0;
/* Load/Compile individual shaders */
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
const VkPipelineShaderStageCreateInfo *stage_info =
&pCreateInfo->pStages[i];
pipeline->base.stages |= pCreateInfo->pStages[i].stage;
VkPipelineCreationFeedback feedback = { 0 };
if (bin_info != NULL && bin_info->binaryCount > 0) {
VK_FROM_HANDLE(vk_pipeline_binary, binary,
bin_info->pPipelineBinaries[binary_index++]);
result = vk_pipeline_load_shader_from_binary(device,
&compile_info.stages[i],
binary);
if (result != VK_SUCCESS)
goto fail_stages_compile;
} else {
result = vk_pipeline_precompile_shader(device, cache, pipeline_flags,
pCreateInfo->pNext,
stage_info, &compile_info.stages[i]);
if (result != VK_SUCCESS)
goto fail_stages_compile;
assert(compile_info.stages[i].precomp != NULL);
result = vk_pipeline_compile_rt_shader(device, cache,
pipeline_flags,
pipeline_layout,
&compile_info.stages[i],
&feedback);
if ((feedback.flags &
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT) == 0 &&
(pipeline->base.flags &
VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)) {
result = VK_PIPELINE_COMPILE_REQUIRED;
goto fail_stages_compile;
}
}
if (result != VK_SUCCESS)
goto fail_stages_compile;
assert(compile_info.stages[i].shader);
/* No need to take a reference, either the pipeline creation succeeds
* and the ownership is transfered from from stages[] to the pipeline or
* it fails and all stages[] elements are unref.
*/
pipeline->stages[pipeline->stage_count++] = (struct vk_rt_stage) {
.shader = vk_shader_ref(compile_info.stages[i].shader),
};
if (feedback_info &&
feedback_info->pipelineStageCreationFeedbackCount > 0) {
feedback_info->pPipelineStageCreationFeedbacks[i] = feedback;
all_cache_hit &= !!(feedback.flags &
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT);
}
}
/* Create/Compile groups */
for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) {
const VkRayTracingShaderGroupCreateInfoKHR *group_info =
&pCreateInfo->pGroups[i];
struct vk_rt_shader_group *group = &pipeline->groups[i];
group->type = group_info->type;
struct vk_pipeline_stage linked_stages[3];
uint32_t linked_stage_count = 0;
for (uint32_t s = 0; s < compile_info.groups[i].stage_count; s++) {
if (compile_info.groups[i].stages[s].linked) {
linked_stages[linked_stage_count] =
compile_info.groups[i].stages[s];
linked_stages[linked_stage_count].precomp =
compile_info.stages[compile_info.groups[i].stage_indices[s]].precomp;
linked_stage_count++;
} else {
compile_info.groups[i].stages[s] = vk_pipeline_stage_clone(
&compile_info.stages[compile_info.groups[i].stage_indices[s]]);
}
}
if (linked_stage_count > 0) {
assert(linked_stage_count > 1);
if (bin_info != NULL && bin_info->binaryCount > 0) {
for (uint32_t s = 0; s < linked_stage_count; s++) {
VK_FROM_HANDLE(vk_pipeline_binary, binary,
bin_info->pPipelineBinaries[binary_index++]);
result = vk_pipeline_load_shader_from_binary(device,
&linked_stages[s],
binary);
if (result != VK_SUCCESS)
goto fail_stages_compile;
}
} else {
bool cache_hit;
result = vk_pipeline_compile_rt_shader_group(device, cache,
pipeline_flags,
pipeline_layout,
linked_stage_count,
linked_stages,
&cache_hit);
if (result != VK_SUCCESS)
goto fail_stages_compile;
all_cache_hit &= cache_hit;
}
/* Discard the precomps */
for (uint32_t s = 0; s < linked_stage_count; s++) {
linked_stages[s].precomp = NULL;
}
}
/* Build the final group with either linked stages or standaline stages.
*/
for (uint32_t s = 0; s < compile_info.groups[i].stage_count; s++) {
if (!compile_info.groups[i].stages[s].linked) {
assert(compile_info.groups[i].stages[s].shader != NULL);
group->stages[s] = (struct vk_rt_stage) {
.shader = vk_shader_ref(compile_info.groups[i].stages[s].shader),
.imported = compile_info.groups[i].stages[s].imported,
};
} else {
for (uint32_t j = 0; j < linked_stage_count; j++) {
if (linked_stages[j].stage == compile_info.groups[i].stages[s].stage) {
group->stages[s] = (struct vk_rt_stage) {
.shader = linked_stages[j].shader,
.linked = true,
};
break;
}
}
}
group->stage_count++;
assert(group->stages[s].shader != NULL);
}
pipeline->group_count++;
}
/* Import library shaders */
for (uint32_t i = pCreateInfo->stageCount; i < compile_info.stage_count; i++) {
pipeline->stages[pipeline->stage_count++] =
vk_rt_stage_from_pipeline_stage(&compile_info.stages[i]);
}
/* Import library groups */
for (uint32_t i = pCreateInfo->groupCount; i < compile_info.group_count; i++) {
pipeline->groups[pipeline->group_count++] =
vk_rt_shader_group_from_compile_info(&compile_info.groups[i]);
}
/* Compute final stats */
for (uint32_t i = 0; i < pipeline->stage_count; i++) {
struct vk_shader *shader = pipeline->stages[i].shader;
stack_max[shader->stage] =
MAX2(shader->stack_size, stack_max[shader->stage]);
pipeline->base.stages |= mesa_to_vk_shader_stage(shader->stage);
pipeline->scratch_size = MAX2(shader->scratch_size, pipeline->scratch_size);
pipeline->ray_queries = MAX2(shader->ray_queries, pipeline->ray_queries);
pipeline->stack_size = MAX2(shader->stack_size, pipeline->stack_size);
}
for (uint32_t g = 0; g < pipeline->group_count; g++) {
const struct vk_rt_shader_group *group = &pipeline->groups[g];
for (uint32_t s = 0; s < group->stage_count; s++) {
struct vk_shader *shader = group->stages[s].shader;
stack_max[shader->stage] =
MAX2(shader->stack_size, stack_max[shader->stage]);
pipeline->base.stages |=
mesa_to_vk_shader_stage(group->stages[s].shader->stage);
pipeline->scratch_size =
MAX2(shader->scratch_size, pipeline->scratch_size);
pipeline->ray_queries = MAX2(shader->ray_queries, pipeline->ray_queries);
pipeline->stack_size = MAX2(shader->stack_size, pipeline->stack_size);
}
}
if (is_rt_stack_size_dynamic(pCreateInfo)) {
pipeline->stack_size = 0; /* 0 means dynamic */
} else {
/* From the Vulkan spec:
*
* "If the stack size is not set explicitly, the stack size for a
* pipeline is:
*
* rayGenStackMax +
* min(1, maxPipelineRayRecursionDepth) ×
* max(closestHitStackMax, missStackMax,
* intersectionStackMax + anyHitStackMax) +
* max(0, maxPipelineRayRecursionDepth-1) ×
* max(closestHitStackMax, missStackMax) +
* 2 × callableStackMax"
*/
pipeline->stack_size = MAX2(
pipeline->stack_size,
stack_max[MESA_SHADER_RAYGEN] +
MIN2(1, pCreateInfo->maxPipelineRayRecursionDepth) *
MAX3(stack_max[MESA_SHADER_CLOSEST_HIT],
stack_max[MESA_SHADER_MISS],
stack_max[MESA_SHADER_INTERSECTION] +
stack_max[MESA_SHADER_ANY_HIT]) +
MAX2(0, (int)pCreateInfo->maxPipelineRayRecursionDepth - 1) *
MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
stack_max[MESA_SHADER_MISS]) +
2 * stack_max[MESA_SHADER_CALLABLE]);
/* This is an extremely unlikely case but we need to set it to some
* non-zero value so that we don't accidentally think it's dynamic.
*/
if (pipeline->stack_size == 0)
pipeline->stack_size = 1;
}
vk_release_rt_pipeline_compile_info(&compile_info, device, pAllocator);
const int64_t pipeline_end = os_time_get_nano();
if (feedback_info != NULL) {
VkPipelineCreationFeedback pipeline_feedback = {
.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
.duration = pipeline_end - pipeline_start,
};
if (all_cache_hit && cache != device->mem_cache) {
pipeline_feedback.flags |=
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
}
*feedback_info->pPipelineCreationFeedback = pipeline_feedback;
}
*pPipeline = vk_pipeline_to_handle(&pipeline->base);
return VK_SUCCESS;
fail_stages_compile:
for (uint32_t i = 0; i < pipeline->group_count; i++)
vk_rt_shader_group_destroy(device, &pipeline->groups[i]);
for (uint32_t i = 0; i < pipeline->stage_count; i++)
vk_shader_unref(device, pipeline->stages[i].shader);
vk_pipeline_free(device, pAllocator, &pipeline->base);
fail_pipeline:
vk_release_rt_pipeline_compile_info(&compile_info, device, pAllocator);
return result;
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_CreateRayTracingPipelinesKHR(
VkDevice _device,
VkDeferredOperationKHR deferredOperation,
VkPipelineCache pipelineCache,
uint32_t createInfoCount,
const VkRayTracingPipelineCreateInfoKHR* pCreateInfos,
const VkAllocationCallbacks* pAllocator,
VkPipeline* pPipelines)
{
VK_FROM_HANDLE(vk_device, device, _device);
VK_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache);
VkResult first_error_or_success = VK_SUCCESS;
/* Use implicit pipeline cache if there's no cache set */
if (!cache && device->mem_cache)
cache = device->mem_cache;
/* From the Vulkan 1.3.274 spec:
*
* "When attempting to create many pipelines in a single command, it is
* possible that creation may fail for a subset of them. In this case,
* the corresponding elements of pPipelines will be set to
* VK_NULL_HANDLE.
*/
memset(pPipelines, 0, createInfoCount * sizeof(*pPipelines));
unsigned i = 0;
for (; i < createInfoCount; i++) {
VkResult result = vk_create_rt_pipeline(device, cache,
&pCreateInfos[i],
pAllocator,
&pPipelines[i]);
if (result == VK_SUCCESS)
continue;
if (first_error_or_success == VK_SUCCESS)
first_error_or_success = result;
/* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
* is not obvious what error should be report upon 2 different failures.
*/
if (result != VK_PIPELINE_COMPILE_REQUIRED)
return result;
const VkPipelineCreateFlags2KHR flags =
vk_rt_pipeline_create_flags(&pCreateInfos[i]);
if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
return result;
}
return first_error_or_success;
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_GetRayTracingShaderGroupHandlesKHR(
VkDevice _device,
VkPipeline _pipeline,
uint32_t firstGroup,
uint32_t groupCount,
size_t dataSize,
void* pData)
{
VK_FROM_HANDLE(vk_device, device, _device);
VK_FROM_HANDLE(vk_pipeline, pipeline, _pipeline);
const struct vk_device_shader_ops *ops = device->shader_ops;
assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
struct vk_rt_pipeline *rt_pipeline =
container_of(pipeline, struct vk_rt_pipeline, base);
assert(dataSize >= device->physical->properties.shaderGroupHandleSize * groupCount);
assert(firstGroup + groupCount <= rt_pipeline->group_count);
for (uint32_t i = 0; i < groupCount; i++) {
struct vk_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
struct vk_shader *shaders[3];
for (uint32_t s = 0; s < group->stage_count; s++)
shaders[s] = group->stages[s].shader;
ops->write_rt_shader_group(device, group->type,
(const struct vk_shader **)shaders,
group->stage_count, pData);
pData = (uint8_t *)pData + device->physical->properties.shaderGroupHandleSize;
}
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
VkDevice _device,
VkPipeline _pipeline,
uint32_t firstGroup,
uint32_t groupCount,
size_t dataSize,
void* pData)
{
VK_FROM_HANDLE(vk_device, device, _device);
VK_FROM_HANDLE(vk_pipeline, pipeline, _pipeline);
const struct vk_device_shader_ops *ops = device->shader_ops;
assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
struct vk_rt_pipeline *rt_pipeline =
container_of(pipeline, struct vk_rt_pipeline, base);
assert(dataSize >= device->physical->properties.shaderGroupHandleSize * groupCount);
assert(firstGroup + groupCount <= rt_pipeline->group_count);
for (uint32_t i = 0; i < groupCount; i++) {
struct vk_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
struct vk_shader *shaders[3] = { 0 };
for (uint32_t s = 0; s < group->stage_count; s++)
shaders[s] = group->stages[s].shader;
ops->write_rt_shader_group_replay_handle(device,
(const struct vk_shader **)shaders,
group->stage_count, pData);
pData = (uint8_t *)pData + device->physical->properties.shaderGroupHandleCaptureReplaySize;
}
return VK_SUCCESS;
}
VKAPI_ATTR VkDeviceSize VKAPI_CALL
vk_common_GetRayTracingShaderGroupStackSizeKHR(
VkDevice device,
VkPipeline _pipeline,
uint32_t _group,
VkShaderGroupShaderKHR groupShader)
{
VK_FROM_HANDLE(vk_pipeline, pipeline, _pipeline);
assert(pipeline->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
struct vk_rt_pipeline *rt_pipeline =
container_of(pipeline, struct vk_rt_pipeline, base);
assert(_group < rt_pipeline->group_count);
struct vk_rt_shader_group *group = &rt_pipeline->groups[_group];
struct vk_shader *shader = NULL;
for (uint32_t i = 0; i < group->stage_count; i++) {
switch (groupShader) {
case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
shader = (group->stages[i].shader->stage == MESA_SHADER_RAYGEN ||
group->stages[i].shader->stage == MESA_SHADER_CALLABLE ||
group->stages[i].shader->stage == MESA_SHADER_MISS) ?
group->stages[i].shader : NULL;
break;
case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
shader = group->stages[i].shader->stage == MESA_SHADER_CLOSEST_HIT ?
group->stages[i].shader : NULL;
break;
case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
shader = group->stages[i].shader->stage == MESA_SHADER_ANY_HIT ?
group->stages[i].shader : NULL;
break;
case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
shader = group->stages[i].shader->stage == MESA_SHADER_INTERSECTION ?
group->stages[i].shader : NULL;
break;
default:
UNREACHABLE("Invalid VkShaderGroupShader enum");
}
if (shader != NULL)
break;
}
return shader ? shader->stack_size : 0;
}
VKAPI_ATTR void VKAPI_CALL
vk_common_CmdSetRayTracingPipelineStackSizeKHR(
VkCommandBuffer commandBuffer,
uint32_t pipelineStackSize)
{
VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
struct vk_device *device = cmd_buffer->base.device;
const struct vk_device_shader_ops *ops = device->shader_ops;
ops->cmd_set_stack_size(cmd_buffer, pipelineStackSize);
}
static VkResult
vk_create_pipeline_binary(struct vk_device *device,
const void *key, size_t key_size,
const void *data, size_t data_size,
const VkAllocationCallbacks *alloc,
VkPipelineBinaryKHR *out_binary_h)
{
struct vk_pipeline_binary *binary =
vk_object_alloc(device, alloc, sizeof(*binary) + data_size,
VK_OBJECT_TYPE_PIPELINE_BINARY_KHR);
if (binary == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
assert(key_size == sizeof(binary->key));
memcpy(binary->key, key, key_size);
binary->size = data_size;
memcpy(binary->data, data, data_size);
*out_binary_h = vk_pipeline_binary_to_handle(binary);
return VK_SUCCESS;
}
static VkResult
vk_create_pipeline_binary_from_precomp(struct vk_device *device,
struct vk_pipeline_precomp_shader *precomp,
const VkAllocationCallbacks *alloc,
VkPipelineBinaryKHR *out_binary_h)
{
VkResult result = VK_SUCCESS;
struct blob blob;
blob_init(&blob);
if (!vk_pipeline_precomp_shader_serialize(&precomp->cache_obj, &blob))
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (result == VK_SUCCESS) {
result = vk_create_pipeline_binary(device,
precomp->cache_key,
sizeof(precomp->cache_key),
blob.data, blob.size,
alloc, out_binary_h);
}
blob_finish(&blob);
return result;
}
static VkResult
vk_create_pipeline_binary_from_shader(struct vk_device *device,
struct vk_shader *shader,
const VkAllocationCallbacks *alloc,
VkPipelineBinaryKHR *out_binary_h)
{
VkResult result = VK_SUCCESS;
struct blob blob;
blob_init(&blob);
if (!shader->ops->serialize(device, shader, &blob))
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
if (result == VK_SUCCESS) {
result = vk_create_pipeline_binary(device,
shader->pipeline.cache_key,
sizeof(shader->pipeline.cache_key),
blob.data, blob.size,
alloc, out_binary_h);
}
blob_finish(&blob);
return result;
}
static VkResult
vk_lookup_create_precomp_binary(struct vk_device *device,
struct vk_pipeline_cache *cache,
const void *key, uint32_t key_size,
const VkAllocationCallbacks *alloc,
VkPipelineBinaryKHR *out_binary_h)
{
struct vk_pipeline_cache_object *cache_obj =
vk_pipeline_cache_lookup_object(cache, key, key_size,
&pipeline_precomp_shader_cache_ops,
NULL);
if (cache_obj == NULL)
return VK_PIPELINE_BINARY_MISSING_KHR;
struct vk_pipeline_precomp_shader *precomp =
vk_pipeline_precomp_shader_from_cache_obj(cache_obj);
VkResult result = vk_create_pipeline_binary_from_precomp(
device, precomp, alloc, out_binary_h);
vk_pipeline_precomp_shader_unref(device, precomp);
return result;
}
static VkResult
vk_lookup_create_shader_binary(struct vk_device *device,
struct vk_pipeline_cache *cache,
const void *key, uint32_t key_size,
const VkAllocationCallbacks *alloc,
VkPipelineBinaryKHR *out_binary_h)
{
struct vk_pipeline_cache_object *cache_obj =
vk_pipeline_cache_lookup_object(cache, key, key_size,
&pipeline_shader_cache_ops,
NULL);
if (cache_obj == NULL)
return VK_PIPELINE_BINARY_MISSING_KHR;
struct vk_shader *shader = vk_shader_from_cache_obj(cache_obj);
VkResult result = vk_create_pipeline_binary_from_shader(
device, shader, alloc, out_binary_h);
vk_shader_unref(device, shader);
return result;
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_CreatePipelineBinariesKHR(
VkDevice _device,
const VkPipelineBinaryCreateInfoKHR* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
VkPipelineBinaryHandlesInfoKHR* pBinaries)
{
VK_FROM_HANDLE(vk_device, device, _device);
VK_FROM_HANDLE(vk_pipeline, pipeline, pCreateInfo->pipeline);
VK_OUTARRAY_MAKE_TYPED(VkPipelineBinaryKHR, out,
pBinaries->pPipelineBinaries,
&pBinaries->pipelineBinaryCount);
VkResult success_or_first_fail = VK_SUCCESS;
/* VkPipelineBinaryCreateInfoKHR:
*
* "When pPipelineCreateInfo is not NULL, an implementation will attempt
* to retrieve pipeline binary data from an internal cache external to
* the application if pipelineBinaryInternalCache is VK_TRUE.
* Applications can use this to determine if a pipeline can be created
* without compilation. If the implementation fails to create a
* pipeline binary due to missing an internal cache entry,
* VK_PIPELINE_BINARY_MISSING_KHR is returned. If creation succeeds, the
* resulting binary can be used to create a pipeline.
* VK_PIPELINE_BINARY_MISSING_KHR may be returned for any reason in this
* situation, even if creating a pipeline binary with the same
* parameters that succeeded earlier."
*/
if (pCreateInfo->pPipelineCreateInfo &&
device->physical->properties.pipelineBinaryInternalCache) {
assert(device->mem_cache != NULL);
struct vk_pipeline_cache *cache = device->mem_cache;
const VkBaseInStructure *next = pCreateInfo->pPipelineCreateInfo->pNext;
switch (next->sType) {
case VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO: {
struct vk_graphics_pipeline_state state_tmp;
struct vk_graphics_pipeline_all_state all_state_tmp;
memset(&state_tmp, 0, sizeof(state_tmp));
struct vk_graphics_pipeline_compile_info info;
vk_get_graphics_pipeline_compile_info(
&info, device, &state_tmp, &all_state_tmp,
pCreateInfo->pPipelineCreateInfo->pNext);
for (uint32_t i = 0; i < info.stage_count; i++) {
if (info.stages[i].imported)
continue;
if (info.retain_precomp) {
vk_outarray_append_typed(VkPipelineBinaryKHR, &out, binary) {
VkResult result = vk_lookup_create_precomp_binary(
device, cache,
info.stages[i].precomp_key,
sizeof(info.stages[i].precomp_key),
pAllocator, binary);
if (result != VK_SUCCESS) {
*binary = VK_NULL_HANDLE;
if (success_or_first_fail == VK_SUCCESS)
success_or_first_fail = result;
}
}
}
vk_outarray_append_typed(VkPipelineBinaryKHR, &out, binary) {
VkResult result = vk_lookup_create_shader_binary(
device, cache,
info.stages[i].shader_key,
sizeof(info.stages[i].shader_key),
pAllocator, binary);
if (result != VK_SUCCESS) {
*binary = VK_NULL_HANDLE;
if (success_or_first_fail == VK_SUCCESS)
success_or_first_fail = result;
}
}
}
vk_release_graphics_pipeline_compile_info(&info, device, pAllocator);
break;
}
case VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO: {
vk_outarray_append_typed(VkPipelineBinaryKHR, &out, binary) {
struct vk_pipeline_stage info;
vk_get_compute_pipeline_compile_info(
&info, device, pCreateInfo->pPipelineCreateInfo->pNext);
VkResult result = vk_lookup_create_shader_binary(
device, cache,
info.shader_key, sizeof(info.shader_key),
pAllocator, binary);
if (result != VK_SUCCESS) {
*binary = VK_NULL_HANDLE;
if (success_or_first_fail == VK_SUCCESS)
success_or_first_fail = result;
}
vk_pipeline_stage_finish(device, &info);
}
break;
}
case VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR: {
struct vk_rt_pipeline_compile_info info;
VkResult result = vk_get_rt_pipeline_compile_info(
&info, device, pCreateInfo->pPipelineCreateInfo->pNext, pAllocator);
if (result != VK_SUCCESS)
return result;
for (uint32_t i = 0; i < info.stage_count; i++) {
if (info.stages[i].imported)
continue;
vk_outarray_append_typed(VkPipelineBinaryKHR, &out, binary) {
result = vk_lookup_create_shader_binary(
device, cache,
info.stages[i].shader_key, sizeof(info.stages[i].shader_key),
pAllocator, binary);
if (result != VK_SUCCESS) {
*binary = VK_NULL_HANDLE;
if (success_or_first_fail == VK_SUCCESS)
success_or_first_fail = result;
}
}
}
for (uint32_t i = 0; i < info.group_count; i++) {
for (uint32_t s = 0; s < info.groups[i].stage_count; s++) {
if (!info.groups[i].stages[s].linked)
continue;
vk_outarray_append_typed(VkPipelineBinaryKHR, &out, binary) {
result = vk_lookup_create_shader_binary(
device, cache,
info.groups[i].stages[s].shader_key,
sizeof(info.groups[i].stages[s].shader_key),
pAllocator, binary);
if (result != VK_SUCCESS) {
*binary = VK_NULL_HANDLE;
if (success_or_first_fail == VK_SUCCESS)
success_or_first_fail = result;
}
}
}
}
vk_release_rt_pipeline_compile_info(&info, device, pAllocator);
break;
}
default:
UNREACHABLE("Unsupported pNext");
}
} else if (pipeline != NULL) {
switch (pipeline->bind_point) {
case VK_PIPELINE_BIND_POINT_GRAPHICS: {
struct vk_graphics_pipeline *gfx_pipeline =
container_of(pipeline, struct vk_graphics_pipeline, base);
for (uint32_t i = 0; i < gfx_pipeline->stage_count; i++) {
if (gfx_pipeline->stages[i].imported)
continue;
if (gfx_pipeline->stages[i].precomp) {
vk_outarray_append_typed(VkPipelineBinaryKHR, &out, binary) {
VkResult result = vk_create_pipeline_binary_from_precomp(
device, gfx_pipeline->stages[i].precomp,
pAllocator, binary);
if (result != VK_SUCCESS) {
*binary = VK_NULL_HANDLE;
if (success_or_first_fail == VK_SUCCESS)
success_or_first_fail = result;
}
}
}
vk_outarray_append_typed(VkPipelineBinaryKHR, &out, binary) {
VkResult result = vk_create_pipeline_binary_from_shader(
device, gfx_pipeline->stages[i].shader,
pAllocator, binary);
if (result != VK_SUCCESS) {
*binary = VK_NULL_HANDLE;
if (success_or_first_fail == VK_SUCCESS)
success_or_first_fail = result;
}
}
}
break;
}
case VK_PIPELINE_BIND_POINT_COMPUTE: {
struct vk_compute_pipeline *cs_pipeline =
container_of(pipeline, struct vk_compute_pipeline, base);
vk_outarray_append_typed(VkPipelineBinaryKHR, &out, binary) {
VkResult result = vk_create_pipeline_binary_from_shader(
device, cs_pipeline->stage.shader,
pAllocator, binary);
if (result != VK_SUCCESS) {
*binary = VK_NULL_HANDLE;
if (success_or_first_fail == VK_SUCCESS)
success_or_first_fail = result;
}
}
break;
}
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
struct vk_rt_pipeline *rt_pipeline =
container_of(pipeline, struct vk_rt_pipeline, base);
for (uint32_t i = 0; i < rt_pipeline->stage_count; i++) {
if (rt_pipeline->stages[i].imported)
continue;
vk_outarray_append_typed(VkPipelineBinaryKHR, &out, binary) {
VkResult result = vk_create_pipeline_binary_from_shader(
device, rt_pipeline->stages[i].shader,
pAllocator, binary);
if (result != VK_SUCCESS) {
*binary = VK_NULL_HANDLE;
if (success_or_first_fail == VK_SUCCESS)
success_or_first_fail = result;
}
}
}
for (uint32_t i = 0; i < rt_pipeline->group_count; i++) {
for (uint32_t s = 0; s < rt_pipeline->groups[i].stage_count; s++) {
if (!rt_pipeline->groups[i].stages[s].linked)
continue;
vk_outarray_append_typed(VkPipelineBinaryKHR, &out, binary) {
VkResult result = vk_create_pipeline_binary_from_shader(
device, rt_pipeline->groups[i].stages[s].shader,
pAllocator, binary);
if (result != VK_SUCCESS) {
*binary = VK_NULL_HANDLE;
if (success_or_first_fail == VK_SUCCESS)
success_or_first_fail = result;
}
}
}
}
break;
}
default:
UNREACHABLE("Unsupported pipeline");
}
} else {
assert(pCreateInfo->pKeysAndDataInfo != NULL);
for (uint32_t i = 0; i < pCreateInfo->pKeysAndDataInfo->binaryCount; i++) {
vk_outarray_append_typed(VkPipelineBinaryKHR, &out, binary) {
VkResult result = vk_create_pipeline_binary(
device,
pCreateInfo->pKeysAndDataInfo->pPipelineBinaryKeys[i].key,
pCreateInfo->pKeysAndDataInfo->pPipelineBinaryKeys[i].keySize,
pCreateInfo->pKeysAndDataInfo->pPipelineBinaryData[i].pData,
pCreateInfo->pKeysAndDataInfo->pPipelineBinaryData[i].dataSize,
pAllocator, binary);
if (result != VK_SUCCESS) {
*binary = VK_NULL_HANDLE;
if (success_or_first_fail == VK_SUCCESS)
success_or_first_fail = result;
}
}
}
}
return success_or_first_fail != VK_SUCCESS ?
success_or_first_fail : vk_outarray_status(&out);
}
VKAPI_ATTR void VKAPI_CALL
vk_common_DestroyPipelineBinaryKHR(
VkDevice _device,
VkPipelineBinaryKHR pipelineBinary,
const VkAllocationCallbacks* pAllocator)
{
VK_FROM_HANDLE(vk_device, device, _device);
VK_FROM_HANDLE(vk_pipeline_binary, binary, pipelineBinary);
if (binary == NULL)
return;
vk_object_free(device, pAllocator, binary);
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_GetPipelineKeyKHR(
VkDevice _device,
const VkPipelineCreateInfoKHR* pPipelineCreateInfo,
VkPipelineBinaryKeyKHR* pPipelineKey)
{
VK_FROM_HANDLE(vk_device, device, _device);
STATIC_ASSERT(sizeof(pPipelineKey->key) == sizeof(blake3_hash));
if (pPipelineCreateInfo == NULL) {
struct vk_physical_device *physical_device = device->physical;
_mesa_blake3_compute(physical_device->properties.shaderBinaryUUID,
sizeof(physical_device->properties.shaderBinaryUUID),
pPipelineKey->key);
pPipelineKey->keySize = sizeof(blake3_hash);
return VK_SUCCESS;
}
const VkBaseInStructure *next = pPipelineCreateInfo->pNext;
struct mesa_blake3 blake3_ctx;
_mesa_blake3_init(&blake3_ctx);
switch (next->sType) {
case VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO: {
struct vk_graphics_pipeline_state state_tmp;
struct vk_graphics_pipeline_all_state all_state_tmp;
memset(&state_tmp, 0, sizeof(state_tmp));
struct vk_graphics_pipeline_compile_info info;
vk_get_graphics_pipeline_compile_info(&info, device,
&state_tmp, &all_state_tmp,
pPipelineCreateInfo->pNext);
for (uint32_t i = 0; i < info.stage_count; i++) {
_mesa_blake3_update(&blake3_ctx, &info.stages[i].shader_key,
sizeof(info.stages[i].shader_key));
}
vk_release_graphics_pipeline_compile_info(&info, device, NULL);
break;
}
case VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO: {
struct vk_pipeline_stage info;
vk_get_compute_pipeline_compile_info(&info, device,
pPipelineCreateInfo->pNext);
_mesa_blake3_update(&blake3_ctx, &info.shader_key,
sizeof(info.shader_key));
vk_pipeline_stage_finish(device, &info);
break;
}
case VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR: {
struct vk_rt_pipeline_compile_info info;
VkResult result =
vk_get_rt_pipeline_compile_info(&info, device,
pPipelineCreateInfo->pNext, NULL);
if (result != VK_SUCCESS)
return result;
for (uint32_t i = 0; i < info.stage_count; i++) {
_mesa_blake3_update(&blake3_ctx, &info.stages[i].shader_key,
sizeof(info.stages[i].shader_key));
}
for (uint32_t i = 0; i < info.group_count; i++) {
for (uint32_t s = 0; s < info.groups[i].stage_count; s++) {
if (!info.groups[i].stages[s].linked)
continue;
_mesa_blake3_update(&blake3_ctx,
&info.groups[i].stages[s].shader_key,
sizeof(info.groups[i].stages[s].shader_key));
}
}
vk_release_rt_pipeline_compile_info(&info, device, NULL);
break;
}
default:
UNREACHABLE("Unsupported pNext");
}
pPipelineKey->keySize = sizeof(blake3_hash);
_mesa_blake3_final(&blake3_ctx, pPipelineKey->key);
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_GetPipelineBinaryDataKHR(
VkDevice device,
const VkPipelineBinaryDataInfoKHR* pInfo,
VkPipelineBinaryKeyKHR* pPipelineBinaryKey,
size_t* pPipelineBinaryDataSize,
void* pPipelineBinaryData)
{
VK_FROM_HANDLE(vk_pipeline_binary, binary, pInfo->pipelineBinary);
pPipelineBinaryKey->keySize = sizeof(binary->key);
memcpy(pPipelineBinaryKey->key, binary->key, sizeof(binary->key));
if (*pPipelineBinaryDataSize == 0) {
*pPipelineBinaryDataSize = binary->size;
return VK_SUCCESS;
}
VkResult result =
*pPipelineBinaryDataSize < binary->size ?
VK_ERROR_NOT_ENOUGH_SPACE_KHR : VK_SUCCESS;
*pPipelineBinaryDataSize = binary->size;
if (result == VK_SUCCESS)
memcpy(pPipelineBinaryData, binary->data, binary->size);
return result;
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_ReleaseCapturedPipelineDataKHR(
VkDevice device,
const VkReleaseCapturedPipelineDataInfoKHR* pInfo,
const VkAllocationCallbacks* pAllocator)
{
/* NO-OP */
return VK_SUCCESS;
}