diff --git a/src/microsoft/clc/clc_compiler.c b/src/microsoft/clc/clc_compiler.c index 89498afa32f..f492943ff27 100644 --- a/src/microsoft/clc/clc_compiler.c +++ b/src/microsoft/clc/clc_compiler.c @@ -731,6 +731,7 @@ clc_spirv_to_dxil(struct clc_libclc *lib, NIR_PASS(progress, nir, nir_opt_remove_phis); NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform); + NIR_PASS(progress, nir, nir_opt_memcpy); } while (progress); } @@ -746,10 +747,6 @@ clc_spirv_to_dxil(struct clc_libclc *lib, assert(nir->scratch_size == 0); NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align); - // Lower memcpy - NIR_PASS_V(nir, nir_opt_memcpy); - NIR_PASS_V(nir, nir_lower_memcpy); - nir_lower_printf_options printf_options = { .treat_doubles_as_floats = true, .max_buffer_size = 1024 * 1024 @@ -766,15 +763,6 @@ clc_spirv_to_dxil(struct clc_libclc *lib, memcpy(metadata->printf.infos[i].arg_sizes, nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned)); } - // copy propagate to prepare for lower_explicit_io - NIR_PASS_V(nir, nir_split_var_copies); - NIR_PASS_V(nir, nir_opt_copy_prop_vars); - NIR_PASS_V(nir, nir_lower_var_copies); - NIR_PASS_V(nir, nir_lower_vars_to_ssa); - NIR_PASS_V(nir, nir_lower_alu); - NIR_PASS_V(nir, nir_opt_dce); - NIR_PASS_V(nir, nir_opt_deref); - // For uniforms (kernel inputs, minus images), run this before adjusting variable list via image/sampler lowering NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_uniform, glsl_get_cl_type_size_align); @@ -878,6 +866,25 @@ clc_spirv_to_dxil(struct clc_libclc *lib, nir_var_mem_shared | nir_var_function_temp | nir_var_mem_global | nir_var_mem_constant, glsl_get_cl_type_size_align); + // Lower memcpy - needs to wait until types are sized + { + bool progress; + do { + progress = false; + NIR_PASS(progress, nir, nir_opt_memcpy); + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + NIR_PASS(progress, nir, nir_opt_deref); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_split_var_copies); + NIR_PASS(progress, nir, nir_lower_var_copies); + NIR_PASS(progress, nir, nir_lower_vars_to_ssa); + NIR_PASS(progress, nir, nir_opt_constant_folding); + NIR_PASS(progress, nir, nir_opt_cse); + } while (progress); + } + NIR_PASS_V(nir, nir_lower_memcpy); + NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp); NIR_PASS_V(nir, clc_lower_constant_to_ssbo, out_dxil->kernel, &uav_id); NIR_PASS_V(nir, clc_lower_global_to_ssbo);