From 07ef1a812449f8fab81257cfaeb2452aa3a902cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 25 Jun 2024 06:32:34 -0400 Subject: [PATCH] ac,radeonsi: set 16-bit flags in io_options optimally Reviewed-by: Georg Lehmann Part-of: --- src/amd/common/ac_shader_util.c | 1 + src/gallium/drivers/radeonsi/si_get.c | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 248cc92f9f1..9a14391b8e9 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -97,6 +97,7 @@ void ac_set_nir_options(struct radeon_info *info, bool use_llvm, options->vectorize_vec2_16bit = info->has_packed_math_16bit; options->discard_is_demote = true; options->io_options = nir_io_has_flexible_input_interpolation_except_flat | + (info->gfx_level >= GFX8 ? nir_io_16bit_input_output_support : 0) | nir_io_prefer_scalar_fs_inputs | nir_io_mix_convergent_flat_with_interpolated; } diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 39257f9c0e9..fa7613319b4 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -1629,6 +1629,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen) (sscreen->info.family >= CHIP_GFX940 && !sscreen->info.has_graphics) || /* fma32 is too slow for gpu < gfx9, so apply the option only for gpu >= gfx9 */ (sscreen->info.gfx_level >= GFX9 && sscreen->options.force_use_fma32); + bool has_mediump = sscreen->info.gfx_level >= GFX8 && sscreen->options.fp16; nir_shader_compiler_options *options = sscreen->nir_options; ac_set_nir_options(&sscreen->info, !sscreen->use_aco, options); @@ -1655,10 +1656,10 @@ void si_init_screen_get_functions(struct si_screen *sscreen) * when execution mode is rtz instead of rtne. */ options->force_f2f16_rtz = true; - options->io_options |= nir_io_glsl_lower_derefs | + options->io_options |= (!has_mediump ? nir_io_mediump_is_32bit : 0) | + nir_io_glsl_lower_derefs | (sscreen->options.optimize_io ? nir_io_glsl_opt_varyings : 0); - options->lower_mediump_io = sscreen->info.gfx_level >= GFX8 && sscreen->options.fp16 ? - si_lower_mediump_io : NULL; + options->lower_mediump_io = has_mediump ? si_lower_mediump_io : NULL; /* HW supports indirect indexing for: | Enabled in driver * ------------------------------------------------------- * TCS inputs | Yes