diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index 7e211ac29cb..8a688db10f1 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -131,7 +131,10 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr const struct zink_shader_key *key = &state->shader_keys.key[stage]; /* non-generated tcs won't use the shader key */ const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; - zm = malloc(sizeof(struct zink_shader_module) + key->size + (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t)); + const bool shadow_needs_shader_swizzle = stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle; + zm = malloc(sizeof(struct zink_shader_module) + key->size + + (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) + + (shadow_needs_shader_swizzle ? sizeof(struct zink_fs_shadow_key) : 0)); if (!zm) { return NULL; } @@ -166,6 +169,10 @@ create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *scr zm->hash = patch_vertices; else zm->hash = shader_module_hash(zm); + if (unlikely(shadow_needs_shader_swizzle)) { + memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.shadow, sizeof(struct zink_fs_shadow_key)); + zm->hash ^= _mesa_hash_data(&ctx->di.shadow, sizeof(struct zink_fs_shadow_key)); + } zm->default_variant = !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*); if (inline_size) prog->inlined_variant_count[stage]++; @@ -185,6 +192,7 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen const struct zink_shader_key *key = &state->shader_keys.key[stage]; /* non-generated tcs won't use the shader key */ const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; + const bool shadow_needs_shader_swizzle = stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle); struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0]; unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *); @@ -199,6 +207,12 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen continue; if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless)) continue; + if (unlikely(shadow_needs_shader_swizzle)) { + /* shadow swizzle data needs a manual compare since it's so fat */ + if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t), + &ctx->di.shadow, sizeof(struct zink_fs_shadow_key))) + continue; + } } if (i > 0) { struct zink_shader_module *zero = pzm[0]; @@ -221,17 +235,19 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr struct zink_shader_module *zm; uint16_t *key; unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8); + bool shadow_needs_shader_swizzle = false; if (zs == prog->last_vertex_stage) { key = (uint16_t*)&state->shader_keys_optimal.key.vs_base; } else if (stage == MESA_SHADER_FRAGMENT) { key = (uint16_t*)&state->shader_keys_optimal.key.fs; + shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false; } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) { key = (uint16_t*)&state->shader_keys_optimal.key.tcs; } else { key = NULL; } size_t key_size = sizeof(uint16_t); - zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0)); + zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_fs_shadow_key) : 0)); if (!zm) { return NULL; } @@ -254,6 +270,8 @@ create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_scr uint16_t *data = (uint16_t*)zm->key; /* sanitize actual key bits */ *data = (*key) & mask; + if (unlikely(shadow_needs_shader_swizzle)) + memcpy(&data[1], &ctx->di.shadow, sizeof(struct zink_fs_shadow_key)); } zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*); util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm); @@ -268,12 +286,14 @@ get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen { /* non-generated tcs won't use the shader key */ const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; + bool shadow_needs_shader_swizzle = false; uint16_t *key; unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8); if (zs == prog->last_vertex_stage) { key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base; } else if (stage == MESA_SHADER_FRAGMENT) { key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs; + shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle; } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) { key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs; } else { @@ -291,6 +311,11 @@ get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen /* no key is bigger than uint16_t */ if (memcmp(iter->key, &val, sizeof(uint16_t))) continue; + if (unlikely(shadow_needs_shader_swizzle)) { + /* shadow swizzle data needs a manual compare since it's so fat */ + if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.shadow, sizeof(struct zink_fs_shadow_key))) + continue; + } } if (i > 0) { struct zink_shader_module *zero = pzm[0]; @@ -603,9 +628,16 @@ update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *pr bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->nir->info.stage); ctx->gfx_pipeline_state.modules_changed |= changed; } - if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits) { + const bool shadow_needs_shader_swizzle = optimal_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT)); + if (ctx->gfx_pipeline_state.shader_keys_optimal.key.fs_bits != optimal_key->fs_bits || + /* always recheck shadow swizzles since they aren't directly part of the key */ + unlikely(shadow_needs_shader_swizzle)) { bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT); ctx->gfx_pipeline_state.modules_changed |= changed; + if (unlikely(shadow_needs_shader_swizzle)) { + struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data; + ctx->gfx_pipeline_state.shadow = (struct zink_fs_shadow_key*)pzm[0]->key + sizeof(uint16_t); + } } if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated && ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs_bits != optimal_key->tcs_bits) { diff --git a/src/gallium/drivers/zink/zink_program_state.hpp b/src/gallium/drivers/zink/zink_program_state.hpp index 3c8322efac5..0e78e9ea9c1 100644 --- a/src/gallium/drivers/zink/zink_program_state.hpp +++ b/src/gallium/drivers/zink/zink_program_state.hpp @@ -362,6 +362,10 @@ equals_gfx_pipeline_state(const void *a, const void *b) if (STAGE_MASK & STAGE_MASK_OPTIMAL) { if (sa->optimal_key != sb->optimal_key) return false; + if (STAGE_MASK & STAGE_MASK_OPTIMAL_SHADOW) { + if (sa->shadow != sb->shadow) + return false; + } } else { if (STAGE_MASK & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) { if (sa->modules[MESA_SHADER_TESS_CTRL] != sb->modules[MESA_SHADER_TESS_CTRL]) @@ -387,10 +391,13 @@ equals_gfx_pipeline_state(const void *a, const void *b) /* below is a bunch of code to pick the right equals_gfx_pipeline_state template for runtime */ template static equals_gfx_pipeline_state_func -get_optimal_gfx_pipeline_stage_eq_func(bool optimal_keys) +get_optimal_gfx_pipeline_stage_eq_func(bool optimal_keys, bool shadow_needs_shader_swizzle) { - if (optimal_keys) + if (optimal_keys) { + if (shadow_needs_shader_swizzle) + return equals_gfx_pipeline_state; return equals_gfx_pipeline_state; + } return equals_gfx_pipeline_state; } @@ -398,6 +405,7 @@ template static equals_gfx_pipeline_state_func get_gfx_pipeline_stage_eq_func(struct zink_gfx_program *prog, bool optimal_keys) { + bool shadow_needs_shader_swizzle = prog->shaders[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask > 0; unsigned vertex_stages = prog->stages_present & BITFIELD_MASK(MESA_SHADER_FRAGMENT); if (vertex_stages & BITFIELD_BIT(MESA_SHADER_TESS_CTRL)) { if (prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated) @@ -407,30 +415,30 @@ get_gfx_pipeline_stage_eq_func(struct zink_gfx_program *prog, bool optimal_keys) if (vertex_stages == BITFIELD_MASK(MESA_SHADER_FRAGMENT)) /* all stages */ return get_optimal_gfx_pipeline_stage_eq_func(optimal_keys); + BITFIELD_MASK(MESA_SHADER_COMPUTE)>(optimal_keys, shadow_needs_shader_swizzle); if (vertex_stages == BITFIELD_MASK(MESA_SHADER_GEOMETRY)) /* tess only: includes generated tcs too */ return get_optimal_gfx_pipeline_stage_eq_func(optimal_keys); + BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle); if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) /* geom only */ return get_optimal_gfx_pipeline_stage_eq_func(optimal_keys); + BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle); } if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_FRAGMENT) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL))) /* all stages but tcs */ return get_optimal_gfx_pipeline_stage_eq_func(optimal_keys); + BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL)>(optimal_keys, shadow_needs_shader_swizzle); if (vertex_stages == (BITFIELD_MASK(MESA_SHADER_GEOMETRY) & ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL))) /* tess only: generated tcs */ return get_optimal_gfx_pipeline_stage_eq_func(optimal_keys); + BITFIELD_MASK(MESA_SHADER_COMPUTE) & ~(BITFIELD_BIT(MESA_SHADER_GEOMETRY) | BITFIELD_BIT(MESA_SHADER_TESS_CTRL))>(optimal_keys, shadow_needs_shader_swizzle); if (vertex_stages == (BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) /* geom only */ return get_optimal_gfx_pipeline_stage_eq_func(optimal_keys); + BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)>(optimal_keys, shadow_needs_shader_swizzle); return get_optimal_gfx_pipeline_stage_eq_func(optimal_keys); + BITFIELD_BIT(MESA_SHADER_VERTEX) | BITFIELD_BIT(MESA_SHADER_FRAGMENT)>(optimal_keys, shadow_needs_shader_swizzle); } equals_gfx_pipeline_state_func diff --git a/src/gallium/drivers/zink/zink_types.h b/src/gallium/drivers/zink/zink_types.h index 2b493f82cda..616983b23f4 100644 --- a/src/gallium/drivers/zink/zink_types.h +++ b/src/gallium/drivers/zink/zink_types.h @@ -797,6 +797,7 @@ struct zink_gfx_pipeline_state { uint32_t vertex_buffers_enabled_mask; uint32_t vertex_strides[PIPE_MAX_ATTRIBS]; struct zink_vertex_elements_hw_state *element_state; + struct zink_fs_shadow_key *shadow; bool sample_locations_enabled; enum pipe_prim_type shader_rast_prim, rast_prim; /* reduced type or max for unknown */ union { @@ -875,7 +876,7 @@ struct zink_shader_module { bool has_nonseamless; uint8_t num_uniforms; uint8_t key_size; - uint8_t key[0]; /* | key | uniforms | */ + uint8_t key[0]; /* | key | uniforms | shadow swizzle | */ }; struct zink_program { @@ -900,6 +901,7 @@ struct zink_program { }; #define STAGE_MASK_OPTIMAL (1<<16) +#define STAGE_MASK_OPTIMAL_SHADOW (1<<17) typedef bool (*equals_gfx_pipeline_state_func)(const void *a, const void *b); struct zink_gfx_library_key {