v3dv/pipeline_cache: cache nir shaders
Heavily based on anv nir caching. One of the bigger difference is that we don't create the nir shader using a ralloc_context local to the main compile graphics method. On anv, after compiling the shader, they discard the nir shader. We need it as we could need it to build shader variants later. As anv, we introduce a environment variable to disable the cache: V3DV_ENABLE_PIPELINE_CACHE By default is enabled. The main purpose for this envvar is debugging, in order to provide a easy way to discard a bug on the cache. It is pending to serialize/deserialize the NIR shaders as part of GetPipelineCacheData and PipelineCacheCreate. We also plan is to cache too shader variants. We would do that on following patches. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
This commit is contained in:
committed by
Marge Bot
parent
1d2ae8756f
commit
2326d5bc04
@@ -45,6 +45,7 @@
|
||||
#include "vk_util.h"
|
||||
|
||||
#include "util/build_id.h"
|
||||
#include "util/debug.h"
|
||||
|
||||
#ifdef VK_USE_PLATFORM_XCB_KHR
|
||||
#include <xcb/xcb.h>
|
||||
@@ -220,6 +221,9 @@ v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
|
||||
return vk_error(NULL, result);
|
||||
}
|
||||
|
||||
instance->pipeline_cache_enabled =
|
||||
env_var_as_boolean("V3DV_ENABLE_PIPELINE_CACHE", true);
|
||||
|
||||
glsl_type_singleton_init_or_ref();
|
||||
|
||||
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
|
||||
|
||||
@@ -1270,6 +1270,7 @@ pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src,
|
||||
p_stage->module = src->module;
|
||||
p_stage->nir = nir_shader_clone(NULL, src->nir);
|
||||
p_stage->spec_info = src->spec_info;
|
||||
memcpy(p_stage->shader_sha1, src->shader_sha1, 20);
|
||||
|
||||
/* Technically we could share the hash_table, but having their own makes
|
||||
* destroy p_stage more straightforward
|
||||
@@ -1567,8 +1568,61 @@ get_ucp_enable_mask(struct v3dv_pipeline_stage **stages)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static nir_shader*
|
||||
pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,
|
||||
struct v3dv_pipeline *pipeline,
|
||||
struct v3dv_pipeline_cache *cache)
|
||||
{
|
||||
nir_shader *nir = NULL;
|
||||
|
||||
nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,
|
||||
&v3dv_nir_options,
|
||||
p_stage->shader_sha1);
|
||||
|
||||
if (nir) {
|
||||
assert(nir->info.stage == p_stage->stage);
|
||||
return nir;
|
||||
}
|
||||
|
||||
nir = shader_module_compile_to_nir(pipeline->device, p_stage);
|
||||
|
||||
if (nir) {
|
||||
v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,
|
||||
p_stage->shader_sha1);
|
||||
return nir;
|
||||
}
|
||||
|
||||
/* FIXME: this shouldn't happen, raise error? */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
pipeline_hash_shader(const struct v3dv_shader_module *module,
|
||||
const char *entrypoint,
|
||||
gl_shader_stage stage,
|
||||
const VkSpecializationInfo *spec_info,
|
||||
unsigned char *sha1_out)
|
||||
{
|
||||
struct mesa_sha1 ctx;
|
||||
_mesa_sha1_init(&ctx);
|
||||
|
||||
_mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
|
||||
_mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
|
||||
_mesa_sha1_update(&ctx, &stage, sizeof(stage));
|
||||
if (spec_info) {
|
||||
_mesa_sha1_update(&ctx, spec_info->pMapEntries,
|
||||
spec_info->mapEntryCount *
|
||||
sizeof(*spec_info->pMapEntries));
|
||||
_mesa_sha1_update(&ctx, spec_info->pData,
|
||||
spec_info->dataSize);
|
||||
}
|
||||
|
||||
_mesa_sha1_final(&ctx, sha1_out);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
|
||||
struct v3dv_pipeline_cache *cache,
|
||||
const VkGraphicsPipelineCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
@@ -1607,12 +1661,15 @@ pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
|
||||
p_stage->module = v3dv_shader_module_from_handle(sinfo->module);
|
||||
p_stage->spec_info = sinfo->pSpecializationInfo;
|
||||
|
||||
pipeline_hash_shader(p_stage->module,
|
||||
p_stage->entrypoint,
|
||||
stage,
|
||||
p_stage->spec_info,
|
||||
p_stage->shader_sha1);
|
||||
|
||||
pipeline->active_stages |= sinfo->stage;
|
||||
|
||||
/* FIXME: when cache support is in place, first check if for the given
|
||||
* spirv module and options, we already have a nir shader.
|
||||
*/
|
||||
p_stage->nir = shader_module_compile_to_nir(pipeline->device, p_stage);
|
||||
p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
|
||||
|
||||
stages[stage] = p_stage;
|
||||
}
|
||||
@@ -2519,6 +2576,7 @@ pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
|
||||
static VkResult
|
||||
pipeline_init(struct v3dv_pipeline *pipeline,
|
||||
struct v3dv_device *device,
|
||||
struct v3dv_pipeline_cache *cache,
|
||||
const VkGraphicsPipelineCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator)
|
||||
{
|
||||
@@ -2570,7 +2628,7 @@ pipeline_init(struct v3dv_pipeline *pipeline,
|
||||
pipeline->primitive_restart =
|
||||
pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
|
||||
|
||||
result = pipeline_compile_graphics(pipeline, pCreateInfo, pAllocator);
|
||||
result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
/* Caller would already destroy the pipeline, and we didn't allocate any
|
||||
@@ -2632,6 +2690,7 @@ graphics_pipeline_create(VkDevice _device,
|
||||
VkPipeline *pPipeline)
|
||||
{
|
||||
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
||||
V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
|
||||
|
||||
struct v3dv_pipeline *pipeline;
|
||||
VkResult result;
|
||||
@@ -2641,7 +2700,7 @@ graphics_pipeline_create(VkDevice _device,
|
||||
if (pipeline == NULL)
|
||||
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
result = pipeline_init(pipeline, device,
|
||||
result = pipeline_init(pipeline, device, cache,
|
||||
pCreateInfo,
|
||||
pAllocator);
|
||||
|
||||
@@ -2706,6 +2765,7 @@ lower_cs_shared(struct nir_shader *nir)
|
||||
|
||||
static VkResult
|
||||
pipeline_compile_compute(struct v3dv_pipeline *pipeline,
|
||||
struct v3dv_pipeline_cache *cache,
|
||||
const VkComputePipelineCreateInfo *info,
|
||||
const VkAllocationCallbacks *alloc)
|
||||
{
|
||||
@@ -2730,7 +2790,14 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline,
|
||||
p_stage->entrypoint = sinfo->pName;
|
||||
p_stage->module = v3dv_shader_module_from_handle(sinfo->module);
|
||||
p_stage->spec_info = sinfo->pSpecializationInfo;
|
||||
p_stage->nir = shader_module_compile_to_nir(pipeline->device, p_stage);
|
||||
|
||||
pipeline_hash_shader(p_stage->module,
|
||||
p_stage->entrypoint,
|
||||
stage,
|
||||
p_stage->spec_info,
|
||||
p_stage->shader_sha1);
|
||||
|
||||
p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);
|
||||
|
||||
pipeline->active_stages |= sinfo->stage;
|
||||
st_nir_opts(p_stage->nir);
|
||||
@@ -2752,6 +2819,7 @@ pipeline_compile_compute(struct v3dv_pipeline *pipeline,
|
||||
static VkResult
|
||||
compute_pipeline_init(struct v3dv_pipeline *pipeline,
|
||||
struct v3dv_device *device,
|
||||
struct v3dv_pipeline_cache *cache,
|
||||
const VkComputePipelineCreateInfo *info,
|
||||
const VkAllocationCallbacks *alloc)
|
||||
{
|
||||
@@ -2760,7 +2828,7 @@ compute_pipeline_init(struct v3dv_pipeline *pipeline,
|
||||
pipeline->device = device;
|
||||
pipeline->layout = layout;
|
||||
|
||||
VkResult result = pipeline_compile_compute(pipeline, info, alloc);
|
||||
VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -2773,6 +2841,7 @@ compute_pipeline_create(VkDevice _device,
|
||||
VkPipeline *pPipeline)
|
||||
{
|
||||
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
||||
V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
|
||||
|
||||
struct v3dv_pipeline *pipeline;
|
||||
VkResult result;
|
||||
@@ -2782,7 +2851,8 @@ compute_pipeline_create(VkDevice _device,
|
||||
if (pipeline == NULL)
|
||||
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
result = compute_pipeline_init(pipeline, device, pCreateInfo, pAllocator);
|
||||
result = compute_pipeline_init(pipeline, device, cache,
|
||||
pCreateInfo, pAllocator);
|
||||
if (result != VK_SUCCESS) {
|
||||
v3dv_destroy_pipeline(pipeline, device, pAllocator);
|
||||
return result;
|
||||
|
||||
@@ -23,16 +23,173 @@
|
||||
|
||||
#include "v3dv_private.h"
|
||||
#include "vulkan/util/vk_util.h"
|
||||
#include "util/blob.h"
|
||||
#include "nir/nir_serialize.h"
|
||||
|
||||
static const bool dump_stats = false;
|
||||
static const bool dump_stats_verbose = false;
|
||||
|
||||
static uint32_t
|
||||
sha1_hash_func(const void *sha1)
|
||||
{
|
||||
return _mesa_hash_data(sha1, 20);
|
||||
}
|
||||
|
||||
static bool
|
||||
sha1_compare_func(const void *sha1_a, const void *sha1_b)
|
||||
{
|
||||
return memcmp(sha1_a, sha1_b, 20) == 0;
|
||||
}
|
||||
|
||||
struct serialized_nir {
|
||||
unsigned char sha1_key[20];
|
||||
size_t size;
|
||||
char data[0];
|
||||
};
|
||||
|
||||
static void
|
||||
cache_dump_stats(struct v3dv_pipeline_cache *cache)
|
||||
{
|
||||
if (!dump_stats_verbose)
|
||||
return;
|
||||
|
||||
fprintf(stderr, " NIR cache entries: %d\n", cache->nir_stats.count);
|
||||
fprintf(stderr, " NIR cache miss count: %d\n", cache->nir_stats.miss);
|
||||
fprintf(stderr, " NIR cache hit count: %d\n", cache->nir_stats.hit);
|
||||
}
|
||||
|
||||
void
|
||||
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
|
||||
struct v3dv_pipeline_cache *cache,
|
||||
nir_shader *nir,
|
||||
unsigned char sha1_key[20])
|
||||
{
|
||||
if (!cache || !cache->nir_cache)
|
||||
return;
|
||||
|
||||
pthread_mutex_lock(&cache->mutex);
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search(cache->nir_cache, sha1_key);
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
if (entry)
|
||||
return;
|
||||
|
||||
struct blob blob;
|
||||
blob_init(&blob);
|
||||
|
||||
nir_serialize(&blob, nir, false);
|
||||
if (blob.out_of_memory) {
|
||||
blob_finish(&blob);
|
||||
return;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&cache->mutex);
|
||||
/* Because ralloc isn't thread-safe, we have to do all this inside the
|
||||
* lock. We could unlock for the big memcpy but it's probably not worth
|
||||
* the hassle.
|
||||
*/
|
||||
entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
|
||||
if (entry) {
|
||||
blob_finish(&blob);
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
struct serialized_nir *snir =
|
||||
ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
|
||||
memcpy(snir->sha1_key, sha1_key, 20);
|
||||
snir->size = blob.size;
|
||||
memcpy(snir->data, blob.data, blob.size);
|
||||
|
||||
blob_finish(&blob);
|
||||
|
||||
if (unlikely(dump_stats)) {
|
||||
char sha1buf[41];
|
||||
_mesa_sha1_format(sha1buf, snir->sha1_key);
|
||||
fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
|
||||
|
||||
cache->nir_stats.count++;
|
||||
cache_dump_stats(cache);
|
||||
}
|
||||
|
||||
_mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
|
||||
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
}
|
||||
|
||||
nir_shader*
|
||||
v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
|
||||
struct v3dv_pipeline_cache *cache,
|
||||
const nir_shader_compiler_options *nir_options,
|
||||
unsigned char sha1_key[20])
|
||||
{
|
||||
if (!cache || !cache->nir_cache)
|
||||
return NULL;
|
||||
|
||||
if (unlikely(dump_stats)) {
|
||||
char sha1buf[41];
|
||||
_mesa_sha1_format(sha1buf, sha1_key);
|
||||
|
||||
fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
|
||||
}
|
||||
|
||||
const struct serialized_nir *snir = NULL;
|
||||
|
||||
pthread_mutex_lock(&cache->mutex);
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search(cache->nir_cache, sha1_key);
|
||||
if (entry)
|
||||
snir = entry->data;
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
|
||||
if (snir) {
|
||||
struct blob_reader blob;
|
||||
blob_reader_init(&blob, snir->data, snir->size);
|
||||
|
||||
/* We use context NULL as we want the p_stage to keep the reference to
|
||||
* nir, as we keep open the possibility of provide a shader variant
|
||||
* after cache creation
|
||||
*/
|
||||
nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
|
||||
if (blob.overrun) {
|
||||
ralloc_free(nir);
|
||||
} else {
|
||||
if (unlikely(dump_stats)) {
|
||||
cache->nir_stats.hit++;
|
||||
cache_dump_stats(cache);
|
||||
}
|
||||
return nir;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(dump_stats)) {
|
||||
cache->nir_stats.miss++;
|
||||
cache_dump_stats(cache);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
pipeline_cache_init(struct v3dv_pipeline_cache *cache,
|
||||
struct v3dv_device *device)
|
||||
struct v3dv_device *device,
|
||||
bool cache_enabled)
|
||||
{
|
||||
cache->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
|
||||
|
||||
cache->device = device;
|
||||
pthread_mutex_init(&cache->mutex, NULL);
|
||||
|
||||
if (cache_enabled) {
|
||||
cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
|
||||
sha1_compare_func);
|
||||
cache->nir_stats.miss = 0;
|
||||
cache->nir_stats.hit = 0;
|
||||
cache->nir_stats.count = 0;
|
||||
} else {
|
||||
cache->nir_cache = NULL;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -82,7 +239,8 @@ v3dv_CreatePipelineCache(VkDevice _device,
|
||||
if (cache == NULL)
|
||||
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
pipeline_cache_init(cache, device);
|
||||
pipeline_cache_init(cache, device,
|
||||
device->instance->pipeline_cache_enabled);
|
||||
|
||||
if (pCreateInfo->initialDataSize > 0) {
|
||||
pipeline_cache_load(cache,
|
||||
@@ -108,6 +266,13 @@ v3dv_DestroyPipelineCache(VkDevice _device,
|
||||
|
||||
pthread_mutex_destroy(&cache->mutex);
|
||||
|
||||
if (cache->nir_cache) {
|
||||
hash_table_foreach(cache->nir_cache, entry)
|
||||
ralloc_free(entry->data);
|
||||
|
||||
_mesa_hash_table_destroy(cache->nir_cache, NULL);
|
||||
}
|
||||
|
||||
vk_free2(&device->alloc, pAllocator, cache);
|
||||
}
|
||||
|
||||
|
||||
@@ -200,6 +200,8 @@ struct v3dv_instance {
|
||||
struct v3dv_physical_device physicalDevice;
|
||||
|
||||
struct vk_debug_report_instance debug_report_callbacks;
|
||||
|
||||
bool pipeline_cache_enabled;
|
||||
};
|
||||
|
||||
/* Tracks wait threads spawned from a single vkQueueSubmit call */
|
||||
@@ -261,11 +263,20 @@ struct v3dv_meta_blit_pipeline {
|
||||
|
||||
#define V3DV_META_BLIT_CACHE_KEY_SIZE (3 * sizeof(uint32_t))
|
||||
|
||||
struct v3dv_pipeline_cache_stats {
|
||||
uint32_t miss;
|
||||
uint32_t hit;
|
||||
uint32_t count;
|
||||
};
|
||||
|
||||
struct v3dv_pipeline_cache {
|
||||
VK_LOADER_DATA _loader_data;
|
||||
|
||||
struct v3dv_device *device;
|
||||
mtx_t mutex;
|
||||
|
||||
struct hash_table *nir_cache;
|
||||
struct v3dv_pipeline_cache_stats nir_stats;
|
||||
};
|
||||
|
||||
struct v3dv_device {
|
||||
@@ -1245,6 +1256,9 @@ struct v3dv_pipeline_stage {
|
||||
|
||||
nir_shader *nir;
|
||||
|
||||
/* The following is the combined hash of module+entrypoint+spec_info+nir */
|
||||
unsigned char shader_sha1[20];
|
||||
|
||||
/** A name for this program, so you can track it in shader-db output. */
|
||||
uint32_t program_id;
|
||||
/** How many variants of this program were compiled, for shader-db. */
|
||||
@@ -1739,6 +1753,17 @@ v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
|
||||
return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
|
||||
}
|
||||
|
||||
void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
|
||||
struct v3dv_pipeline_cache *cache,
|
||||
nir_shader *nir,
|
||||
unsigned char sha1_key[20]);
|
||||
|
||||
nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
|
||||
struct v3dv_pipeline_cache *cache,
|
||||
const nir_shader_compiler_options *nir_options,
|
||||
unsigned char sha1_key[20]);
|
||||
|
||||
|
||||
#define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType) \
|
||||
\
|
||||
static inline struct __v3dv_type * \
|
||||
|
||||
Reference in New Issue
Block a user