From e9b2bb244bab82c4638ead8cee57c9507d793239 Mon Sep 17 00:00:00 2001 From: Jesse Natalie Date: Thu, 18 May 2023 10:49:04 -0700 Subject: [PATCH] microsoft/clc: Try harder to optimize memcpys before lowering them For the case of memset, the SPIR-V translator produces a copy from a byte array of 0s. If we wait to lower memcpys until after types are sized, we can potentially turn those 0s into SSA zeros and remove the entire constant array. Part-of: --- src/microsoft/clc/clc_compiler.c | 33 +++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/microsoft/clc/clc_compiler.c b/src/microsoft/clc/clc_compiler.c index 89498afa32f..f492943ff27 100644 --- a/src/microsoft/clc/clc_compiler.c +++ b/src/microsoft/clc/clc_compiler.c @@ -731,6 +731,7 @@ clc_spirv_to_dxil(struct clc_libclc *lib, NIR_PASS(progress, nir, nir_opt_remove_phis); NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform); + NIR_PASS(progress, nir, nir_opt_memcpy); } while (progress); } @@ -746,10 +747,6 @@ clc_spirv_to_dxil(struct clc_libclc *lib, assert(nir->scratch_size == 0); NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align); - // Lower memcpy - NIR_PASS_V(nir, nir_opt_memcpy); - NIR_PASS_V(nir, nir_lower_memcpy); - nir_lower_printf_options printf_options = { .treat_doubles_as_floats = true, .max_buffer_size = 1024 * 1024 @@ -766,15 +763,6 @@ clc_spirv_to_dxil(struct clc_libclc *lib, memcpy(metadata->printf.infos[i].arg_sizes, nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned)); } - // copy propagate to prepare for lower_explicit_io - NIR_PASS_V(nir, nir_split_var_copies); - NIR_PASS_V(nir, nir_opt_copy_prop_vars); - NIR_PASS_V(nir, nir_lower_var_copies); - NIR_PASS_V(nir, nir_lower_vars_to_ssa); - NIR_PASS_V(nir, nir_lower_alu); - NIR_PASS_V(nir, nir_opt_dce); - NIR_PASS_V(nir, nir_opt_deref); - // For uniforms (kernel inputs, minus images), run this before adjusting variable list via image/sampler lowering NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_uniform, glsl_get_cl_type_size_align); @@ -878,6 +866,25 @@ clc_spirv_to_dxil(struct clc_libclc *lib, nir_var_mem_shared | nir_var_function_temp | nir_var_mem_global | nir_var_mem_constant, glsl_get_cl_type_size_align); + // Lower memcpy - needs to wait until types are sized + { + bool progress; + do { + progress = false; + NIR_PASS(progress, nir, nir_opt_memcpy); + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + NIR_PASS(progress, nir, nir_opt_deref); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_split_var_copies); + NIR_PASS(progress, nir, nir_lower_var_copies); + NIR_PASS(progress, nir, nir_lower_vars_to_ssa); + NIR_PASS(progress, nir, nir_opt_constant_folding); + NIR_PASS(progress, nir, nir_opt_cse); + } while (progress); + } + NIR_PASS_V(nir, nir_lower_memcpy); + NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp); NIR_PASS_V(nir, clc_lower_constant_to_ssbo, out_dxil->kernel, &uav_id); NIR_PASS_V(nir, clc_lower_global_to_ssbo);