From 487ac6dbd600ada6124caab685f345fa437b1e4f Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 9 Feb 2023 12:17:25 -0500 Subject: [PATCH] zink: implement cross-program pipeline library sharing some games/apps (e.g., DOOM2016) compile+link shaders in one context and then use them in another, expecting that the compiled shaders will be reused. vulkan has pipeline (library) objects, which are not specific to shaders but are in theory representing the shaders being used thus, pipeline (library) objects need to be reusable for any case where a shader can be reused to handle this: * extract pipeline library cache to a refcounted object * store these objects on the screen * make them owned by shaders separable programs are slightly different since they'll use their own fastpath, thus making their library caches owned by the programs to avoid polluting the optimized caches fixes #8264 Part-of: --- src/gallium/drivers/zink/zink_compiler.c | 12 +++++++ src/gallium/drivers/zink/zink_draw.cpp | 15 ++++++++ src/gallium/drivers/zink/zink_program.c | 45 +++++++++++++++++++++--- src/gallium/drivers/zink/zink_screen.c | 7 ++++ src/gallium/drivers/zink/zink_screen.h | 3 ++ src/gallium/drivers/zink/zink_types.h | 10 ++++++ 6 files changed, 87 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 2352fe17aa4..73b87f1f1f2 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -4231,6 +4231,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model; util_queue_fence_init(&ret->precompile.fence); + util_dynarray_init(&ret->pipeline_libs, ret); ret->hash = _mesa_hash_pointer(ret); ret->programs = _mesa_pointer_set_create(NULL); @@ -4500,6 +4501,17 @@ zink_shader_free(struct zink_screen *screen, struct zink_shader *shader) prog->base.removed = true; simple_mtx_unlock(&prog->ctx->program_lock[idx]); util_queue_fence_wait(&prog->base.cache_fence); + + while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) { + struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*); + if (!libs->removed) { + libs->removed = true; + simple_mtx_lock(&screen->pipeline_libs_lock[idx]); + _mesa_set_remove_key(&screen->pipeline_libs[idx], libs); + simple_mtx_unlock(&screen->pipeline_libs_lock[idx]); + } + zink_gfx_lib_cache_unref(screen, libs); + } } if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) { prog->shaders[stage] = NULL; diff --git a/src/gallium/drivers/zink/zink_draw.cpp b/src/gallium/drivers/zink/zink_draw.cpp index dd099db85b0..368ab25a900 100644 --- a/src/gallium/drivers/zink/zink_draw.cpp +++ b/src/gallium/drivers/zink/zink_draw.cpp @@ -1215,3 +1215,18 @@ zink_init_grid_functions(struct zink_context *ctx) */ ctx->base.launch_grid = zink_invalid_launch_grid; } + +void +zink_init_screen_pipeline_libs(struct zink_screen *screen) +{ + _mesa_set_init(&screen->pipeline_libs[0], screen, hash_gfx_program<0>, equals_gfx_program<0>); + _mesa_set_init(&screen->pipeline_libs[1], screen, hash_gfx_program<1>, equals_gfx_program<1>); + _mesa_set_init(&screen->pipeline_libs[2], screen, hash_gfx_program<2>, equals_gfx_program<2>); + _mesa_set_init(&screen->pipeline_libs[3], screen, hash_gfx_program<3>, equals_gfx_program<3>); + _mesa_set_init(&screen->pipeline_libs[4], screen, hash_gfx_program<4>, equals_gfx_program<4>); + _mesa_set_init(&screen->pipeline_libs[5], screen, hash_gfx_program<5>, equals_gfx_program<5>); + _mesa_set_init(&screen->pipeline_libs[6], screen, hash_gfx_program<6>, equals_gfx_program<6>); + _mesa_set_init(&screen->pipeline_libs[7], screen, hash_gfx_program<7>, equals_gfx_program<7>); + for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs_lock); i++) + simple_mtx_init(&screen->pipeline_libs_lock[i], mtx_plain); +} \ No newline at end of file diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index fd0dc0301eb..be7d841f2bc 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -957,6 +957,42 @@ create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs) return libs; } +static struct zink_gfx_lib_cache * +find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *prog) +{ + unsigned stages_present = prog->stages_present; + bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated; + if (generated_tcs) + stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL); + unsigned idx = zink_program_cache_stages(stages_present); + struct set *ht = &screen->pipeline_libs[idx]; + const uint32_t hash = prog->gfx_hash; + + simple_mtx_lock(&screen->pipeline_libs_lock[idx]); + bool found = false; + struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found); + struct zink_gfx_lib_cache *libs; + if (found) { + libs = (void*)entry->key; + } else { + libs = create_lib_cache(prog, generated_tcs); + memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders)); + entry->key = libs; + unsigned refs = 0; + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { + if (prog->shaders[i] && (!generated_tcs || i != MESA_SHADER_TESS_CTRL)) { + simple_mtx_lock(&prog->shaders[i]->lock); + util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs); + simple_mtx_unlock(&prog->shaders[i]->lock); + refs++; + } + } + p_atomic_set(&libs->refcount, refs); + } + simple_mtx_unlock(&screen->pipeline_libs_lock[idx]); + return libs; +} + struct zink_gfx_program * zink_create_gfx_program(struct zink_context *ctx, struct zink_shader **stages, @@ -981,13 +1017,11 @@ zink_create_gfx_program(struct zink_context *ctx, prog->stages_present |= BITFIELD_BIT(i); } } - bool generated_tcs = false; if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) { prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] = zink_shader_tcs_create(screen, stages[MESA_SHADER_VERTEX], vertices_per_patch); prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL); - generated_tcs = true; } prog->stages_remaining = prog->stages_present; @@ -1010,8 +1044,7 @@ zink_create_gfx_program(struct zink_context *ctx, } } - prog->libs = create_lib_cache(prog, generated_tcs); - p_atomic_set(&prog->libs, 1); + prog->libs = find_or_create_lib_cache(screen, prog); struct mesa_sha1 sctx; _mesa_sha1_init(&sctx); @@ -1077,6 +1110,7 @@ create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stag prog->shaders[MESA_SHADER_FRAGMENT] = stages[MESA_SHADER_FRAGMENT]; prog->last_vertex_stage = stages[MESA_SHADER_VERTEX]; prog->libs = create_lib_cache(prog, false); + /* this libs cache is owned by the program */ p_atomic_set(&prog->libs->refcount, 1); unsigned refs = 0; @@ -1409,7 +1443,8 @@ zink_destroy_gfx_program(struct zink_screen *screen, ralloc_free(prog->nir[i]); } } - zink_gfx_lib_cache_unref(screen, prog->libs); + if (prog->is_separable) + zink_gfx_lib_cache_unref(screen, prog->libs); ralloc_free(prog); } diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index 9f7f4488c52..98abc2417e9 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -1435,6 +1435,12 @@ zink_destroy_screen(struct pipe_screen *pscreen) } #endif disk_cache_destroy(screen->disk_cache); + + for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs); i++) + _mesa_set_clear(&screen->pipeline_libs[i], NULL); + for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs_lock); i++) + simple_mtx_destroy(&screen->pipeline_libs_lock[i]); + zink_bo_deinit(screen); util_live_shader_cache_deinit(&screen->shaders); @@ -2938,6 +2944,7 @@ zink_internal_create_screen(const struct pipe_screen_config *config) screen->buffer_barrier = zink_resource_buffer_barrier; } + zink_init_screen_pipeline_libs(screen); if (!init_layouts(screen)) goto fail; diff --git a/src/gallium/drivers/zink/zink_screen.h b/src/gallium/drivers/zink/zink_screen.h index 1eed13c197c..d65f50ac88d 100644 --- a/src/gallium/drivers/zink/zink_screen.h +++ b/src/gallium/drivers/zink/zink_screen.h @@ -34,6 +34,9 @@ extern "C" { extern uint32_t zink_debug; struct util_dl_library; +void +zink_init_screen_pipeline_libs(struct zink_screen *screen); + /* update last_finished to account for batch_id wrapping */ static inline void diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index 35c2d78919f..0c81b9eee40 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -747,6 +747,7 @@ struct zink_shader { simple_mtx_t lock; struct set *programs; + struct util_dynarray pipeline_libs; union { struct { @@ -984,7 +985,10 @@ struct zink_gfx_pipeline_cache_entry { }; struct zink_gfx_lib_cache { + /* for hashing */ + struct zink_shader *shaders[ZINK_GFX_SHADER_COUNT]; unsigned refcount; + bool removed; //once removed from cache simple_mtx_t lock; struct set libs; //zink_gfx_library_key -> VkPipeline @@ -1285,6 +1289,12 @@ struct zink_screen { struct util_queue cache_put_thread; struct util_queue cache_get_thread; + /* there are 5 gfx stages, but VS and FS are assumed to be always present, + * thus only 3 stages need to be considered, giving 2^3 = 8 program caches. + */ + struct set pipeline_libs[8]; + simple_mtx_t pipeline_libs_lock[8]; + simple_mtx_t desc_set_layouts_lock; struct hash_table desc_set_layouts[ZINK_DESCRIPTOR_BASE_TYPES]; simple_mtx_t desc_pool_keys_lock;