diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 02f44f72a76..40ca2c62a20 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -524,69 +524,155 @@ update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_ } static void -assign_io_locations(nir_shader *nir, unsigned char *shader_slot_map, - unsigned char *shader_slots_reserved) +assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map) { - unsigned reserved = shader_slots_reserved ? *shader_slots_reserved : 0; - nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) { - if ((nir->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in) || - (nir->info.stage == MESA_SHADER_FRAGMENT && var->data.mode == nir_var_shader_out)) - continue; + unsigned slot = var->data.location; + switch (var->data.location) { + case VARYING_SLOT_POS: + case VARYING_SLOT_PNTC: + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_LAYER: + case VARYING_SLOT_PRIMITIVE_ID: + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CULL_DIST0: + case VARYING_SLOT_VIEWPORT: + case VARYING_SLOT_FACE: + case VARYING_SLOT_TESS_LEVEL_OUTER: + case VARYING_SLOT_TESS_LEVEL_INNER: + /* use a sentinel value to avoid counting later */ + var->data.driver_location = UINT_MAX; + break; - unsigned slot = var->data.location; - switch (var->data.location) { - case VARYING_SLOT_POS: - case VARYING_SLOT_PNTC: - case VARYING_SLOT_PSIZ: - case VARYING_SLOT_LAYER: - case VARYING_SLOT_PRIMITIVE_ID: - case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_CULL_DIST0: - case VARYING_SLOT_VIEWPORT: - case VARYING_SLOT_FACE: - case VARYING_SLOT_TESS_LEVEL_OUTER: - case VARYING_SLOT_TESS_LEVEL_INNER: - /* use a sentinel value to avoid counting later */ - var->data.driver_location = UINT_MAX; - break; - - default: - if (var->data.patch) { - assert(var->data.location >= VARYING_SLOT_PATCH0); - slot = var->data.location - VARYING_SLOT_PATCH0; - } else if (var->data.location >= VARYING_SLOT_VAR0 && - ((var->data.mode == nir_var_shader_out && - nir->info.stage == MESA_SHADER_TESS_CTRL) || - (var->data.mode != nir_var_shader_out && - nir->info.stage == MESA_SHADER_TESS_EVAL))) { - slot = var->data.location - VARYING_SLOT_VAR0; - } else { - if (shader_slot_map[var->data.location] == 0xff) { - assert(reserved < MAX_VARYING); - shader_slot_map[var->data.location] = reserved; - if (nir->info.stage == MESA_SHADER_TESS_CTRL && var->data.location >= VARYING_SLOT_VAR0) - reserved += (glsl_count_vec4_slots(var->type, false, false) / 32 /*MAX_PATCH_VERTICES*/); - else - reserved += glsl_count_vec4_slots(var->type, false, false); - } - slot = shader_slot_map[var->data.location]; - assert(slot < MAX_VARYING); + default: + if (var->data.patch) { + assert(var->data.location >= VARYING_SLOT_PATCH0); + slot = var->data.location - VARYING_SLOT_PATCH0; + } else if (var->data.location >= VARYING_SLOT_VAR0 && + var->data.mode == nir_var_shader_in && + stage == MESA_SHADER_TESS_EVAL) { + slot = var->data.location - VARYING_SLOT_VAR0; + } else { + if (slot_map[var->data.location] == 0xff) { + assert(*reserved < MAX_VARYING); + slot_map[var->data.location] = *reserved; + *reserved += glsl_count_vec4_slots(var->type, false, false); } - var->data.driver_location = slot; + slot = slot_map[var->data.location]; + assert(slot < MAX_VARYING); + } + var->data.driver_location = slot; + } +} + +ALWAYS_INLINE static bool +is_texcoord(gl_shader_stage stage, const nir_variable *var) +{ + if (stage != MESA_SHADER_FRAGMENT) + return false; + return var->data.location >= VARYING_SLOT_TEX0 && + var->data.location <= VARYING_SLOT_TEX7; +} + +static bool +assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map) +{ + switch (var->data.location) { + case VARYING_SLOT_POS: + case VARYING_SLOT_PNTC: + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_LAYER: + case VARYING_SLOT_PRIMITIVE_ID: + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CULL_DIST0: + case VARYING_SLOT_VIEWPORT: + case VARYING_SLOT_FACE: + case VARYING_SLOT_TESS_LEVEL_OUTER: + case VARYING_SLOT_TESS_LEVEL_INNER: + /* use a sentinel value to avoid counting later */ + var->data.driver_location = UINT_MAX; + break; + default: + if (var->data.patch) { + assert(var->data.location >= VARYING_SLOT_PATCH0); + var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0; + } else if (var->data.location >= VARYING_SLOT_VAR0 && + stage == MESA_SHADER_TESS_CTRL && + var->data.mode == nir_var_shader_out) + var->data.driver_location = var->data.location - VARYING_SLOT_VAR0; + else { + if (slot_map[var->data.location] == (unsigned char)-1) { + if (!is_texcoord(stage, var)) + /* dead io */ + return false; + /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE */ + slot_map[var->data.location] = (*reserved)++; + } + var->data.driver_location = slot_map[var->data.location]; } } + return true; +} - if (shader_slots_reserved) - *shader_slots_reserved = reserved; + +static bool +rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data) +{ + nir_variable *var = data; + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_deref) + return false; + nir_variable *deref_var = nir_intrinsic_get_var(intr, 0); + if (deref_var != var) + return false; + nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest)); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef); + return true; +} + +void +zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer) +{ + unsigned reserved = 0; + unsigned char slot_map[VARYING_SLOT_MAX]; + memset(slot_map, -1, sizeof(slot_map)); + bool do_fixup = false; + nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer; + if (producer->info.stage == MESA_SHADER_TESS_CTRL) { + /* never assign from tcs -> tes, always invert */ + nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in) + assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map); + nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) { + if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map)) + /* this is an output, nothing more needs to be done for it to be dropped */ + do_fixup = true; + } + } else { + nir_foreach_variable_with_modes(var, producer, nir_var_shader_out) + assign_producer_var_io(producer->info.stage, var, &reserved, slot_map); + nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) { + if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) { + do_fixup = true; + /* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */ + nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var); + } + } + } + if (!do_fixup) + return; + nir_fixup_deref_modes(nir); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); + optimize_nir(nir); } VkShaderModule -zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct zink_shader_key *key, - unsigned char *shader_slot_map, unsigned char *shader_slots_reserved) +zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, struct zink_shader_key *key) { VkShaderModule mod = VK_NULL_HANDLE; void *streamout = NULL; - nir_shader *nir = nir_shader_clone(NULL, zs->nir); + nir_shader *nir = nir_shader_clone(NULL, base_nir); if (key) { if (key->inline_uniforms) { @@ -640,8 +726,6 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct z } NIR_PASS_V(nir, nir_convert_from_ssa, true); - assign_io_locations(nir, shader_slot_map, shader_slots_reserved); - struct spirv_shader *spirv = nir_to_spirv(nir, streamout, screen->spirv_version); if (!spirv) goto done; diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index 7191f3a8412..4f6a4ddfa77 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -93,10 +93,10 @@ struct zink_shader { void zink_screen_init_compiler(struct zink_screen *screen); - +void +zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer); VkShaderModule -zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct zink_shader_key *key, - unsigned char *shader_slot_map, unsigned char *shader_slots_reserved); +zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, struct zink_shader_key *key); struct zink_shader * zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index f22481093a7..9fd07a4010d 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -249,8 +249,7 @@ get_shader_module_for_stage(struct zink_context *ctx, struct zink_shader *zs, st return NULL; } pipe_reference_init(&zm->reference, 1); - mod = zink_shader_compile(zink_screen(ctx->base.screen), zs, &key, - prog->shader_slot_map, &prog->shader_slots_reserved); + mod = zink_shader_compile(zink_screen(ctx->base.screen), zs, prog->nir[stage], &key); if (!mod) { ralloc_free(keybox); FREE(zm); @@ -370,82 +369,6 @@ equals_gfx_pipeline_state(const void *a, const void *b) !memcmp(a, b, offsetof(struct zink_gfx_pipeline_state, hash)); } -static void -init_slot_map(struct zink_context *ctx, struct zink_gfx_program *prog) -{ - unsigned existing_shaders = 0; - bool needs_new_map = false; - - /* if there's a case where we'll be reusing any shaders, we need to (maybe) reuse the slot map too */ - if (ctx->curr_program) { - for (int i = 0; i < ZINK_SHADER_COUNT; ++i) { - if (ctx->curr_program->shaders[i]) - existing_shaders |= 1 << i; - } - /* if there's reserved slots, check whether we have enough remaining slots */ - if (ctx->curr_program->shader_slots_reserved) { - uint64_t max_outputs = 0; - uint32_t num_xfb_outputs = 0; - for (int i = 0; i < ZINK_SHADER_COUNT; ++i) { - if (i != PIPE_SHADER_TESS_CTRL && - i != PIPE_SHADER_FRAGMENT && - ctx->gfx_stages[i]) { - uint32_t user_outputs = ctx->gfx_stages[i]->nir->info.outputs_written >> 32; - uint32_t builtin_outputs = ctx->gfx_stages[i]->nir->info.outputs_written; - num_xfb_outputs = MAX2(num_xfb_outputs, ctx->gfx_stages[i]->streamout.so_info.num_outputs); - unsigned user_outputs_count = 0; - /* check builtins first */ - u_foreach_bit(slot, builtin_outputs) { - switch (slot) { - /* none of these require slot map entries */ - case VARYING_SLOT_POS: - case VARYING_SLOT_PSIZ: - case VARYING_SLOT_LAYER: - case VARYING_SLOT_PRIMITIVE_ID: - case VARYING_SLOT_CULL_DIST0: - case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_VIEWPORT: - case VARYING_SLOT_TESS_LEVEL_INNER: - case VARYING_SLOT_TESS_LEVEL_OUTER: - break; - default: - /* remaining legacy builtins only require 1 slot each */ - if (ctx->curr_program->shader_slot_map[slot] == -1) - user_outputs_count++; - break; - } - } - u_foreach_bit(slot, user_outputs) { - if (ctx->curr_program->shader_slot_map[slot] == -1) { - /* user variables can span multiple slots */ - nir_variable *var = nir_find_variable_with_location(ctx->gfx_stages[i]->nir, - nir_var_shader_out, slot); - assert(var); - if (i == PIPE_SHADER_TESS_CTRL && var->data.location >= VARYING_SLOT_VAR0) - user_outputs_count += (glsl_count_vec4_slots(var->type, false, false) / 32 /*MAX_PATCH_VERTICES*/); - else - user_outputs_count += glsl_count_vec4_slots(var->type, false, false); - } - } - max_outputs = MAX2(max_outputs, user_outputs_count); - } - } - /* slot map can only hold 32 entries, so dump this one if we'll exceed that */ - if (ctx->curr_program->shader_slots_reserved + max_outputs + num_xfb_outputs > 32) - needs_new_map = true; - } - } - - if (needs_new_map || ctx->dirty_shader_stages == existing_shaders || !existing_shaders) { - /* all shaders are being recompiled: new slot map */ - memset(prog->shader_slot_map, -1, sizeof(prog->shader_slot_map)); - } else { - /* at least some shaders are being reused: use existing slot map so locations match up */ - memcpy(prog->shader_slot_map, ctx->curr_program->shader_slot_map, sizeof(prog->shader_slot_map)); - prog->shader_slots_reserved = ctx->curr_program->shader_slots_reserved; - } -} - void zink_update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog) { @@ -489,6 +412,32 @@ zink_pipeline_layout_create(struct zink_screen *screen, struct zink_program *pg) return layout; } +static void +assign_io(struct zink_gfx_program *prog, struct zink_shader *stages[ZINK_SHADER_COUNT]) +{ + struct zink_shader *shaders[PIPE_SHADER_TYPES]; + + /* build array in pipeline order */ + for (unsigned i = 0; i < ZINK_SHADER_COUNT; i++) + shaders[tgsi_processor_to_shader_stage(i)] = stages[i]; + + for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) { + nir_shader *producer = shaders[i]->nir; + for (unsigned j = i + 1; j < ZINK_SHADER_COUNT; i++, j++) { + struct zink_shader *consumer = shaders[j]; + if (!consumer) + continue; + if (!prog->nir[producer->info.stage]) + prog->nir[producer->info.stage] = nir_shader_clone(prog, producer); + if (!prog->nir[j]) + prog->nir[j] = nir_shader_clone(prog, consumer->nir); + zink_compiler_assign_io(prog->nir[producer->info.stage], prog->nir[j]); + i = j; + break; + } + } +} + struct zink_gfx_program * zink_create_gfx_program(struct zink_context *ctx, struct zink_shader *stages[ZINK_SHADER_COUNT]) @@ -513,7 +462,7 @@ zink_create_gfx_program(struct zink_context *ctx, ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_TESS_CTRL); } - init_slot_map(ctx, prog); + assign_io(prog, prog->shaders); update_shader_modules(ctx, prog->shaders, prog, false); @@ -604,7 +553,7 @@ zink_create_compute_program(struct zink_context *ctx, struct zink_shader *shader comp->module = CALLOC_STRUCT(zink_shader_module); assert(comp->module); pipe_reference_init(&comp->module->reference, 1); - comp->module->shader = zink_shader_compile(screen, shader, NULL, NULL, NULL); + comp->module->shader = zink_shader_compile(screen, shader, shader->nir, NULL); assert(comp->module->shader); _mesa_hash_table_insert(&comp->base.shader_cache[0], &shader->shader_id, comp->module); diff --git a/src/gallium/drivers/zink/zink_program.h b/src/gallium/drivers/zink/zink_program.h index 89aabdb65bf..087a0fa0124 100644 --- a/src/gallium/drivers/zink/zink_program.h +++ b/src/gallium/drivers/zink/zink_program.h @@ -90,13 +90,12 @@ struct zink_program { struct zink_gfx_program { struct zink_program base; + struct nir_shader *nir[ZINK_SHADER_COUNT]; struct zink_shader_module *modules[ZINK_SHADER_COUNT]; // compute stage doesn't belong here struct zink_shader_module *default_variants[ZINK_SHADER_COUNT][2]; //[default, no streamout] const void *default_variant_key[ZINK_SHADER_COUNT]; struct zink_shader *shaders[ZINK_SHADER_COUNT]; - unsigned char shader_slot_map[VARYING_SLOT_MAX]; - unsigned char shader_slots_reserved; struct hash_table *pipelines[11]; // number of draw modes we support };