microsoft/clc: Try harder to optimize memcpys before lowering them

For the case of memset, the SPIR-V translator produces a copy from
a byte array of 0s. If we wait to lower memcpys until after types
are sized, we can potentially turn those 0s into SSA zeros and remove
the entire constant array.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23173>
This commit is contained in:
Jesse Natalie
2023-05-18 10:49:04 -07:00
committed by Marge Bot
parent fba82797d7
commit e9b2bb244b
+20 -13
View File
@@ -731,6 +731,7 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
NIR_PASS(progress, nir, nir_opt_remove_phis);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
NIR_PASS(progress, nir, nir_opt_memcpy);
} while (progress);
}
@@ -746,10 +747,6 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
assert(nir->scratch_size == 0);
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
// Lower memcpy
NIR_PASS_V(nir, nir_opt_memcpy);
NIR_PASS_V(nir, nir_lower_memcpy);
nir_lower_printf_options printf_options = {
.treat_doubles_as_floats = true,
.max_buffer_size = 1024 * 1024
@@ -766,15 +763,6 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
memcpy(metadata->printf.infos[i].arg_sizes, nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned));
}
// copy propagate to prepare for lower_explicit_io
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_opt_copy_prop_vars);
NIR_PASS_V(nir, nir_lower_var_copies);
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
NIR_PASS_V(nir, nir_lower_alu);
NIR_PASS_V(nir, nir_opt_dce);
NIR_PASS_V(nir, nir_opt_deref);
// For uniforms (kernel inputs, minus images), run this before adjusting variable list via image/sampler lowering
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_uniform, glsl_get_cl_type_size_align);
@@ -878,6 +866,25 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
nir_var_mem_shared | nir_var_function_temp | nir_var_mem_global | nir_var_mem_constant,
glsl_get_cl_type_size_align);
// Lower memcpy - needs to wait until types are sized
{
bool progress;
do {
progress = false;
NIR_PASS(progress, nir, nir_opt_memcpy);
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
NIR_PASS(progress, nir, nir_opt_deref);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_split_var_copies);
NIR_PASS(progress, nir, nir_lower_var_copies);
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
NIR_PASS(progress, nir, nir_opt_constant_folding);
NIR_PASS(progress, nir, nir_opt_cse);
} while (progress);
}
NIR_PASS_V(nir, nir_lower_memcpy);
NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp);
NIR_PASS_V(nir, clc_lower_constant_to_ssbo, out_dxil->kernel, &uav_id);
NIR_PASS_V(nir, clc_lower_global_to_ssbo);