diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index 537eb2491eb..f01ee03c62b 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -269,20 +269,6 @@ typedef struct ac_nir_gs_output_info { nir_alu_type (*types_16bit_hi)[4]; } ac_nir_gs_output_info; -nir_shader * -ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, - enum amd_gfx_level gfx_level, - uint32_t export_clipdist_mask, - bool write_pos_to_clipvertex, - bool pack_clip_cull_distances, - const uint8_t *param_offsets, - bool has_param_exports, - bool disable_streamout, - bool kill_pointsize, - bool kill_layer, - bool force_vrs, - ac_nir_gs_output_info *output_info); - bool ac_nir_lower_legacy_vs(nir_shader *nir, enum amd_gfx_level gfx_level, @@ -297,11 +283,29 @@ ac_nir_lower_legacy_vs(nir_shader *nir, bool kill_layer, bool force_vrs); +typedef struct { + bool has_gen_prim_query; + bool has_pipeline_stats_query; + ac_nir_gs_output_info *output_info; + + enum amd_gfx_level gfx_level; + uint32_t export_clipdist_mask; + bool write_pos_to_clipvertex; + bool pack_clip_cull_distances; + const uint8_t *param_offsets; + bool has_param_exports; + bool disable_streamout; + bool kill_pointsize; + bool kill_layer; + bool force_vrs; +} ac_nir_lower_legacy_gs_options; + +nir_shader * +ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options); + bool -ac_nir_lower_legacy_gs(nir_shader *nir, - bool has_gen_prim_query, - bool has_pipeline_stats_query, - ac_nir_gs_output_info *output_info); +ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options, + nir_shader **gs_copy_shader); /* This is a pre-link pass. It should only eliminate code and do lowering that mostly doesn't * generate AMD-specific intrinsics. diff --git a/src/amd/common/nir/ac_nir_create_gs_copy_shader.c b/src/amd/common/nir/ac_nir_create_gs_copy_shader.c index 5abb57b70c2..35c10e34f23 100644 --- a/src/amd/common/nir/ac_nir_create_gs_copy_shader.c +++ b/src/amd/common/nir/ac_nir_create_gs_copy_shader.c @@ -11,21 +11,11 @@ #include "nir_xfb_info.h" nir_shader * -ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, - enum amd_gfx_level gfx_level, - uint32_t export_clipdist_mask, - bool write_pos_to_clipvertex, - bool pack_clip_cull_distances, - const uint8_t *param_offsets, - bool has_param_exports, - bool disable_streamout, - bool kill_pointsize, - bool kill_layer, - bool force_vrs, - ac_nir_gs_output_info *output_info) +ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, ac_nir_lower_legacy_gs_options *options) { nir_builder b = nir_builder_init_simple_shader( MESA_SHADER_VERTEX, gs_nir->options, "gs_copy"); + ac_nir_gs_output_info *output_info = options->output_info; b.shader->info.outputs_written = gs_nir->info.outputs_written; b.shader->info.outputs_written_16bit = gs_nir->info.outputs_written_16bit; @@ -34,7 +24,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, nir_xfb_info *info = ac_nir_get_sorted_xfb_info(gs_nir); nir_def *stream_id = NULL; - if (!disable_streamout && info) + if (!options->disable_streamout && info) stream_id = nir_ubfe_imm(&b, nir_load_streamout_config_amd(&b), 24, 2); nir_def *vtx_offset = nir_imul_imm(&b, nir_load_vertex_id_zero_base(&b), 4); @@ -112,17 +102,18 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, if (stream == 0) { uint64_t export_outputs = b.shader->info.outputs_written | VARYING_BIT_POS; - if (kill_pointsize) + if (options->kill_pointsize) export_outputs &= ~VARYING_BIT_PSIZ; - if (kill_layer) + if (options->kill_layer) export_outputs &= ~VARYING_BIT_LAYER; - ac_nir_export_position(&b, gfx_level, export_clipdist_mask, false, write_pos_to_clipvertex, - pack_clip_cull_distances, !has_param_exports, force_vrs, export_outputs, + ac_nir_export_position(&b, options->gfx_level, options->export_clipdist_mask, false, + options->write_pos_to_clipvertex, options->pack_clip_cull_distances, + !options->has_param_exports, options->force_vrs, export_outputs, &out, NULL); - if (has_param_exports) { - ac_nir_export_parameters(&b, param_offsets, + if (options->has_param_exports) { + ac_nir_export_parameters(&b, options->param_offsets, b.shader->info.outputs_written, b.shader->info.outputs_written_16bit, &out); diff --git a/src/amd/common/nir/ac_nir_lower_legacy_gs.c b/src/amd/common/nir/ac_nir_lower_legacy_gs.c index 3ceedbc9f75..4c8b911e55a 100644 --- a/src/amd/common/nir/ac_nir_lower_legacy_gs.c +++ b/src/amd/common/nir/ac_nir_lower_legacy_gs.c @@ -230,13 +230,11 @@ lower_legacy_gs_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, void *sta } bool -ac_nir_lower_legacy_gs(nir_shader *nir, - bool has_gen_prim_query, - bool has_pipeline_stats_query, - ac_nir_gs_output_info *output_info) +ac_nir_lower_legacy_gs(nir_shader *nir, ac_nir_lower_legacy_gs_options *options, + nir_shader **gs_copy_shader) { lower_legacy_gs_state s = { - .info = output_info, + .info = options->output_info, }; unsigned num_vertices_per_primitive = 0; @@ -265,9 +263,9 @@ ac_nir_lower_legacy_gs(nir_shader *nir, /* Emit shader query for mix use legacy/NGG GS */ bool progress = ac_nir_gs_shader_query(b, - has_gen_prim_query, - has_pipeline_stats_query, - has_pipeline_stats_query, + options->has_gen_prim_query, + options->has_pipeline_stats_query, + options->has_pipeline_stats_query, num_vertices_per_primitive, 64, s.vertex_count, @@ -286,5 +284,6 @@ ac_nir_lower_legacy_gs(nir_shader *nir, nir_progress(progress, impl, nir_metadata_none); + *gs_copy_shader = ac_nir_create_gs_copy_shader(nir, options); return true; } diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 82bdc16f5b3..cdc61488a22 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -454,7 +454,17 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat .sysval_mask = stage->info.gs.output_usage_mask, .varying_mask = stage->info.gs.output_usage_mask, }; - NIR_PASS(_, stage->nir, ac_nir_lower_legacy_gs, false, false, &gs_out_info); + ac_nir_lower_legacy_gs_options options = { + .has_gen_prim_query = false, + .has_pipeline_stats_query = false, + .output_info = &gs_out_info, + .gfx_level = pdev->info.gfx_level, + .export_clipdist_mask = stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask, + .param_offsets = stage->info.outinfo.vs_output_param_offset, + .has_param_exports = stage->info.outinfo.param_exports, + .force_vrs = stage->info.force_vrs_per_vertex, + }; + NIR_PASS(_, stage->nir, ac_nir_lower_legacy_gs, &options, &stage->gs_copy_shader); } } else if (stage->stage == MESA_SHADER_FRAGMENT) { ac_nir_lower_ps_late_options late_options = { diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index ab2faac6737..8e7db05cef8 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -2265,16 +2265,7 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache struct radv_instance *instance = radv_physical_device_instance(pdev); const struct radv_shader_info *gs_info = &gs_stage->info; - ac_nir_gs_output_info output_info = { - .streams = gs_info->gs.output_streams, - .sysval_mask = gs_info->gs.output_usage_mask, - .varying_mask = gs_info->gs.output_usage_mask, - }; - nir_shader *nir = ac_nir_create_gs_copy_shader( - gs_stage->nir, pdev->info.gfx_level, gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, false, - false, gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false, - gs_info->force_vrs_per_vertex, &output_info); - + nir_shader *nir = gs_stage->gs_copy_shader; nir->info.internal = true; nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader"); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 2c3ee50246f..cb4641a6496 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -272,6 +272,7 @@ struct radv_shader_stage { unsigned char shader_sha1[20]; nir_shader *nir; + nir_shader *gs_copy_shader; nir_shader *internal_nir; /* meta shaders */ struct radv_shader_info info; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 2d5ea98d29c..e9b06ea2a92 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1606,9 +1606,44 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx * } progress = true; } else if (nir->info.stage == MESA_SHADER_GEOMETRY && !key->ge.as_ngg) { + STATIC_ASSERT(sizeof(ctx->temp_info.vs_output_param_offset[0]) == 1); + memset(ctx->temp_info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000, + sizeof(ctx->temp_info.vs_output_param_offset)); + + for (unsigned i = 0; i < sel->info.num_outputs; i++) { + unsigned semantic = sel->info.output_semantic[i]; + + /* Skip if no channel writes to stream 0. */ + if (!nir_slot_is_varying(semantic, MESA_SHADER_FRAGMENT) || + (sel->info.output_streams[i] & 0x03 && /* whether component 0 writes to non-zero stream */ + sel->info.output_streams[i] & 0x0c && /* whether component 1 writes to non-zero stream */ + sel->info.output_streams[i] & 0x30 && /* whether component 2 writes to non-zero stream */ + sel->info.output_streams[i] & 0xc0)) /* whether component 3 writes to non-zero stream */ + continue; + + ctx->temp_info.vs_output_param_offset[semantic] = shader->info.nr_param_exports++; + } + si_init_gs_output_info(&sel->info, &ctx->temp_info); - NIR_PASS_V(nir, ac_nir_lower_legacy_gs, false, sel->screen->use_ngg, - &ctx->temp_info.gs_out_info); + + unsigned clip_cull_mask = + (sel->info.clipdist_mask & ~shader->key.ge.opt.kill_clip_distances) | sel->info.culldist_mask; + + ac_nir_lower_legacy_gs_options options = { + .has_gen_prim_query = false, + .has_pipeline_stats_query = sel->screen->use_ngg, + .output_info = &ctx->temp_info.gs_out_info, + .gfx_level = sel->screen->info.gfx_level, + .export_clipdist_mask = clip_cull_mask, + .param_offsets = ctx->temp_info.vs_output_param_offset, + .has_param_exports = shader->info.nr_param_exports, + .disable_streamout = !shader->info.num_streamout_vec4s, + .kill_pointsize = key->ge.opt.kill_pointsize, + .kill_layer = key->ge.opt.kill_layer, + .force_vrs = sel->screen->options.vrs2x2, + }; + + NIR_PASS(_, nir, ac_nir_lower_legacy_gs, &options, &ctx->gs_copy_shader); progress = true; } else if (nir->info.stage == MESA_SHADER_FRAGMENT && shader->is_monolithic) { ac_nir_lower_ps_late_options late_options = { @@ -1865,14 +1900,11 @@ static struct si_shader * si_nir_generate_gs_copy_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, struct si_shader *gs_shader, - struct si_temp_shader_variant_info *temp_info, - nir_shader *gs_nir, + nir_shader *gs_nir, nir_shader *gs_copy_shader, struct util_debug_callback *debug) { struct si_shader *shader; struct si_shader_selector *gs_selector = gs_shader->selector; - struct si_shader_info *gsinfo = &gs_selector->info; - union si_shader_key *gskey = &gs_shader->key; shader = CALLOC_STRUCT(si_shader); if (!shader) @@ -1886,43 +1918,10 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen, shader->is_gs_copy_shader = true; shader->wave_size = si_determine_wave_size(sscreen, shader); shader->info.num_streamout_vec4s = gs_shader->info.num_streamout_vec4s; + shader->info.nr_pos_exports = si_get_nr_pos_exports(gs_selector, &gs_shader->key); + shader->info.nr_param_exports = gs_shader->info.nr_param_exports; - STATIC_ASSERT(sizeof(temp_info->vs_output_param_offset[0]) == 1); - memset(temp_info->vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000, - sizeof(temp_info->vs_output_param_offset)); - - for (unsigned i = 0; i < gsinfo->num_outputs; i++) { - unsigned semantic = gsinfo->output_semantic[i]; - - /* Skip if no channel writes to stream 0. */ - if (!nir_slot_is_varying(semantic, MESA_SHADER_FRAGMENT) || - (gsinfo->output_streams[i] & 0x03 && /* whether component 0 writes to non-zero stream */ - gsinfo->output_streams[i] & 0x0c && /* whether component 1 writes to non-zero stream */ - gsinfo->output_streams[i] & 0x30 && /* whether component 2 writes to non-zero stream */ - gsinfo->output_streams[i] & 0xc0)) /* whether component 3 writes to non-zero stream */ - continue; - - temp_info->vs_output_param_offset[semantic] = shader->info.nr_param_exports++; - } - - shader->info.nr_pos_exports = si_get_nr_pos_exports(gs_selector, gskey); - - unsigned clip_cull_mask = - (gsinfo->clipdist_mask & ~gskey->ge.opt.kill_clip_distances) | gsinfo->culldist_mask; - - nir_shader *nir = - ac_nir_create_gs_copy_shader(gs_nir, - sscreen->info.gfx_level, - clip_cull_mask, - false, false, - temp_info->vs_output_param_offset, - shader->info.nr_param_exports, - !gs_shader->info.num_streamout_vec4s, - gskey->ge.opt.kill_pointsize, - gskey->ge.opt.kill_layer, - sscreen->options.vrs2x2, - &temp_info->gs_out_info); - + nir_shader *nir = gs_copy_shader; struct si_linked_shaders linked; memset(&linked, 0, sizeof(linked)); linked.consumer.nir = nir; @@ -2049,8 +2048,8 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi /* The GS copy shader is compiled next. */ if (nir->info.stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) { shader->gs_copy_shader = - si_nir_generate_gs_copy_shader(sscreen, compiler, shader, &linked.consumer.temp_info, - nir, debug); + si_nir_generate_gs_copy_shader(sscreen, compiler, shader, nir, + linked.consumer.gs_copy_shader, debug); if (!shader->gs_copy_shader) { fprintf(stderr, "radeonsi: can't create GS copy shader\n"); ret = false; diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 7da9019999f..35a6b107a7a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -64,6 +64,7 @@ struct si_nir_shader_ctx { struct si_shader_args args; struct si_temp_shader_variant_info temp_info; nir_shader *nir; + nir_shader *gs_copy_shader; bool free_nir; };