diff --git a/src/broadcom/vulkan/v3dv_device.c b/src/broadcom/vulkan/v3dv_device.c index 7ebf10b8af2..a509eede5e1 100644 --- a/src/broadcom/vulkan/v3dv_device.c +++ b/src/broadcom/vulkan/v3dv_device.c @@ -45,6 +45,7 @@ #include "vk_util.h" #include "util/build_id.h" +#include "util/debug.h" #ifdef VK_USE_PLATFORM_XCB_KHR #include @@ -220,6 +221,9 @@ v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, return vk_error(NULL, result); } + instance->pipeline_cache_enabled = + env_var_as_boolean("V3DV_ENABLE_PIPELINE_CACHE", true); + glsl_type_singleton_init_or_ref(); VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index cfe7fc74c40..9ce41e2a6c2 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -1270,6 +1270,7 @@ pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src, p_stage->module = src->module; p_stage->nir = nir_shader_clone(NULL, src->nir); p_stage->spec_info = src->spec_info; + memcpy(p_stage->shader_sha1, src->shader_sha1, 20); /* Technically we could share the hash_table, but having their own makes * destroy p_stage more straightforward @@ -1567,8 +1568,61 @@ get_ucp_enable_mask(struct v3dv_pipeline_stage **stages) return 0; } +static nir_shader* +pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage, + struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_cache *cache) +{ + nir_shader *nir = NULL; + + nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache, + &v3dv_nir_options, + p_stage->shader_sha1); + + if (nir) { + assert(nir->info.stage == p_stage->stage); + return nir; + } + + nir = shader_module_compile_to_nir(pipeline->device, p_stage); + + if (nir) { + v3dv_pipeline_cache_upload_nir(pipeline, cache, nir, + p_stage->shader_sha1); + return nir; + } + + /* FIXME: this shouldn't happen, raise error? */ + return NULL; +} + +static void +pipeline_hash_shader(const struct v3dv_shader_module *module, + const char *entrypoint, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info, + unsigned char *sha1_out) +{ + struct mesa_sha1 ctx; + _mesa_sha1_init(&ctx); + + _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); + _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint)); + _mesa_sha1_update(&ctx, &stage, sizeof(stage)); + if (spec_info) { + _mesa_sha1_update(&ctx, spec_info->pMapEntries, + spec_info->mapEntryCount * + sizeof(*spec_info->pMapEntries)); + _mesa_sha1_update(&ctx, spec_info->pData, + spec_info->dataSize); + } + + _mesa_sha1_final(&ctx, sha1_out); +} + static VkResult pipeline_compile_graphics(struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator) { @@ -1607,12 +1661,15 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline, p_stage->module = v3dv_shader_module_from_handle(sinfo->module); p_stage->spec_info = sinfo->pSpecializationInfo; + pipeline_hash_shader(p_stage->module, + p_stage->entrypoint, + stage, + p_stage->spec_info, + p_stage->shader_sha1); + pipeline->active_stages |= sinfo->stage; - /* FIXME: when cache support is in place, first check if for the given - * spirv module and options, we already have a nir shader. - */ - p_stage->nir = shader_module_compile_to_nir(pipeline->device, p_stage); + p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache); stages[stage] = p_stage; } @@ -2519,6 +2576,7 @@ pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline, static VkResult pipeline_init(struct v3dv_pipeline *pipeline, struct v3dv_device *device, + struct v3dv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator) { @@ -2570,7 +2628,7 @@ pipeline_init(struct v3dv_pipeline *pipeline, pipeline->primitive_restart = pCreateInfo->pInputAssemblyState->primitiveRestartEnable; - result = pipeline_compile_graphics(pipeline, pCreateInfo, pAllocator); + result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator); if (result != VK_SUCCESS) { /* Caller would already destroy the pipeline, and we didn't allocate any @@ -2632,6 +2690,7 @@ graphics_pipeline_create(VkDevice _device, VkPipeline *pPipeline) { V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); struct v3dv_pipeline *pipeline; VkResult result; @@ -2641,7 +2700,7 @@ graphics_pipeline_create(VkDevice _device, if (pipeline == NULL) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - result = pipeline_init(pipeline, device, + result = pipeline_init(pipeline, device, cache, pCreateInfo, pAllocator); @@ -2706,6 +2765,7 @@ lower_cs_shared(struct nir_shader *nir) static VkResult pipeline_compile_compute(struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_cache *cache, const VkComputePipelineCreateInfo *info, const VkAllocationCallbacks *alloc) { @@ -2730,7 +2790,14 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline, p_stage->entrypoint = sinfo->pName; p_stage->module = v3dv_shader_module_from_handle(sinfo->module); p_stage->spec_info = sinfo->pSpecializationInfo; - p_stage->nir = shader_module_compile_to_nir(pipeline->device, p_stage); + + pipeline_hash_shader(p_stage->module, + p_stage->entrypoint, + stage, + p_stage->spec_info, + p_stage->shader_sha1); + + p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache); pipeline->active_stages |= sinfo->stage; st_nir_opts(p_stage->nir); @@ -2752,6 +2819,7 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline, static VkResult compute_pipeline_init(struct v3dv_pipeline *pipeline, struct v3dv_device *device, + struct v3dv_pipeline_cache *cache, const VkComputePipelineCreateInfo *info, const VkAllocationCallbacks *alloc) { @@ -2760,7 +2828,7 @@ compute_pipeline_init(struct v3dv_pipeline *pipeline, pipeline->device = device; pipeline->layout = layout; - VkResult result = pipeline_compile_compute(pipeline, info, alloc); + VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc); return result; } @@ -2773,6 +2841,7 @@ compute_pipeline_create(VkDevice _device, VkPipeline *pPipeline) { V3DV_FROM_HANDLE(v3dv_device, device, _device); + V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); struct v3dv_pipeline *pipeline; VkResult result; @@ -2782,7 +2851,8 @@ compute_pipeline_create(VkDevice _device, if (pipeline == NULL) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - result = compute_pipeline_init(pipeline, device, pCreateInfo, pAllocator); + result = compute_pipeline_init(pipeline, device, cache, + pCreateInfo, pAllocator); if (result != VK_SUCCESS) { v3dv_destroy_pipeline(pipeline, device, pAllocator); return result; diff --git a/src/broadcom/vulkan/v3dv_pipeline_cache.c b/src/broadcom/vulkan/v3dv_pipeline_cache.c index d7718f830b2..d0874d1326a 100644 --- a/src/broadcom/vulkan/v3dv_pipeline_cache.c +++ b/src/broadcom/vulkan/v3dv_pipeline_cache.c @@ -23,16 +23,173 @@ #include "v3dv_private.h" #include "vulkan/util/vk_util.h" +#include "util/blob.h" +#include "nir/nir_serialize.h" +static const bool dump_stats = false; +static const bool dump_stats_verbose = false; + +static uint32_t +sha1_hash_func(const void *sha1) +{ + return _mesa_hash_data(sha1, 20); +} + +static bool +sha1_compare_func(const void *sha1_a, const void *sha1_b) +{ + return memcmp(sha1_a, sha1_b, 20) == 0; +} + +struct serialized_nir { + unsigned char sha1_key[20]; + size_t size; + char data[0]; +}; + +static void +cache_dump_stats(struct v3dv_pipeline_cache *cache) +{ + if (!dump_stats_verbose) + return; + + fprintf(stderr, " NIR cache entries: %d\n", cache->nir_stats.count); + fprintf(stderr, " NIR cache miss count: %d\n", cache->nir_stats.miss); + fprintf(stderr, " NIR cache hit count: %d\n", cache->nir_stats.hit); +} + +void +v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_cache *cache, + nir_shader *nir, + unsigned char sha1_key[20]) +{ + if (!cache || !cache->nir_cache) + return; + + pthread_mutex_lock(&cache->mutex); + struct hash_entry *entry = + _mesa_hash_table_search(cache->nir_cache, sha1_key); + pthread_mutex_unlock(&cache->mutex); + if (entry) + return; + + struct blob blob; + blob_init(&blob); + + nir_serialize(&blob, nir, false); + if (blob.out_of_memory) { + blob_finish(&blob); + return; + } + + pthread_mutex_lock(&cache->mutex); + /* Because ralloc isn't thread-safe, we have to do all this inside the + * lock. We could unlock for the big memcpy but it's probably not worth + * the hassle. + */ + entry = _mesa_hash_table_search(cache->nir_cache, sha1_key); + if (entry) { + blob_finish(&blob); + pthread_mutex_unlock(&cache->mutex); + return; + } + + struct serialized_nir *snir = + ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size); + memcpy(snir->sha1_key, sha1_key, 20); + snir->size = blob.size; + memcpy(snir->data, blob.data, blob.size); + + blob_finish(&blob); + + if (unlikely(dump_stats)) { + char sha1buf[41]; + _mesa_sha1_format(sha1buf, snir->sha1_key); + fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf); + + cache->nir_stats.count++; + cache_dump_stats(cache); + } + + _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir); + + pthread_mutex_unlock(&cache->mutex); +} + +nir_shader* +v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_cache *cache, + const nir_shader_compiler_options *nir_options, + unsigned char sha1_key[20]) +{ + if (!cache || !cache->nir_cache) + return NULL; + + if (unlikely(dump_stats)) { + char sha1buf[41]; + _mesa_sha1_format(sha1buf, sha1_key); + + fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf); + } + + const struct serialized_nir *snir = NULL; + + pthread_mutex_lock(&cache->mutex); + struct hash_entry *entry = + _mesa_hash_table_search(cache->nir_cache, sha1_key); + if (entry) + snir = entry->data; + pthread_mutex_unlock(&cache->mutex); + + if (snir) { + struct blob_reader blob; + blob_reader_init(&blob, snir->data, snir->size); + + /* We use context NULL as we want the p_stage to keep the reference to + * nir, as we keep open the possibility of provide a shader variant + * after cache creation + */ + nir_shader *nir = nir_deserialize(NULL, nir_options, &blob); + if (blob.overrun) { + ralloc_free(nir); + } else { + if (unlikely(dump_stats)) { + cache->nir_stats.hit++; + cache_dump_stats(cache); + } + return nir; + } + } + + if (unlikely(dump_stats)) { + cache->nir_stats.miss++; + cache_dump_stats(cache); + } + + return NULL; +} static void pipeline_cache_init(struct v3dv_pipeline_cache *cache, - struct v3dv_device *device) + struct v3dv_device *device, + bool cache_enabled) { cache->_loader_data.loaderMagic = ICD_LOADER_MAGIC; cache->device = device; pthread_mutex_init(&cache->mutex, NULL); + + if (cache_enabled) { + cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func, + sha1_compare_func); + cache->nir_stats.miss = 0; + cache->nir_stats.hit = 0; + cache->nir_stats.count = 0; + } else { + cache->nir_cache = NULL; + } + } static void @@ -82,7 +239,8 @@ v3dv_CreatePipelineCache(VkDevice _device, if (cache == NULL) return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - pipeline_cache_init(cache, device); + pipeline_cache_init(cache, device, + device->instance->pipeline_cache_enabled); if (pCreateInfo->initialDataSize > 0) { pipeline_cache_load(cache, @@ -108,6 +266,13 @@ v3dv_DestroyPipelineCache(VkDevice _device, pthread_mutex_destroy(&cache->mutex); + if (cache->nir_cache) { + hash_table_foreach(cache->nir_cache, entry) + ralloc_free(entry->data); + + _mesa_hash_table_destroy(cache->nir_cache, NULL); + } + vk_free2(&device->alloc, pAllocator, cache); } diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h index 520f6f7075f..143d62cc10e 100644 --- a/src/broadcom/vulkan/v3dv_private.h +++ b/src/broadcom/vulkan/v3dv_private.h @@ -200,6 +200,8 @@ struct v3dv_instance { struct v3dv_physical_device physicalDevice; struct vk_debug_report_instance debug_report_callbacks; + + bool pipeline_cache_enabled; }; /* Tracks wait threads spawned from a single vkQueueSubmit call */ @@ -261,11 +263,20 @@ struct v3dv_meta_blit_pipeline { #define V3DV_META_BLIT_CACHE_KEY_SIZE (3 * sizeof(uint32_t)) +struct v3dv_pipeline_cache_stats { + uint32_t miss; + uint32_t hit; + uint32_t count; +}; + struct v3dv_pipeline_cache { VK_LOADER_DATA _loader_data; struct v3dv_device *device; mtx_t mutex; + + struct hash_table *nir_cache; + struct v3dv_pipeline_cache_stats nir_stats; }; struct v3dv_device { @@ -1245,6 +1256,9 @@ struct v3dv_pipeline_stage { nir_shader *nir; + /* The following is the combined hash of module+entrypoint+spec_info+nir */ + unsigned char shader_sha1[20]; + /** A name for this program, so you can track it in shader-db output. */ uint32_t program_id; /** How many variants of this program were compiled, for shader-db. */ @@ -1739,6 +1753,17 @@ v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set, return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset); } +void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_cache *cache, + nir_shader *nir, + unsigned char sha1_key[20]); + +nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline, + struct v3dv_pipeline_cache *cache, + const nir_shader_compiler_options *nir_options, + unsigned char sha1_key[20]); + + #define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType) \ \ static inline struct __v3dv_type * \