microsoft/clc: Try harder to optimize memcpys before lowering them
For the case of memset, the SPIR-V translator produces a copy from a byte array of 0s. If we wait to lower memcpys until after types are sized, we can potentially turn those 0s into SSA zeros and remove the entire constant array. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23173>
This commit is contained in:
@@ -731,6 +731,7 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
|
||||
NIR_PASS(progress, nir, nir_opt_remove_phis);
|
||||
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
|
||||
NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
|
||||
NIR_PASS(progress, nir, nir_opt_memcpy);
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
@@ -746,10 +747,6 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
|
||||
assert(nir->scratch_size == 0);
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
|
||||
|
||||
// Lower memcpy
|
||||
NIR_PASS_V(nir, nir_opt_memcpy);
|
||||
NIR_PASS_V(nir, nir_lower_memcpy);
|
||||
|
||||
nir_lower_printf_options printf_options = {
|
||||
.treat_doubles_as_floats = true,
|
||||
.max_buffer_size = 1024 * 1024
|
||||
@@ -766,15 +763,6 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
|
||||
memcpy(metadata->printf.infos[i].arg_sizes, nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned));
|
||||
}
|
||||
|
||||
// copy propagate to prepare for lower_explicit_io
|
||||
NIR_PASS_V(nir, nir_split_var_copies);
|
||||
NIR_PASS_V(nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS_V(nir, nir_lower_var_copies);
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS_V(nir, nir_lower_alu);
|
||||
NIR_PASS_V(nir, nir_opt_dce);
|
||||
NIR_PASS_V(nir, nir_opt_deref);
|
||||
|
||||
// For uniforms (kernel inputs, minus images), run this before adjusting variable list via image/sampler lowering
|
||||
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_uniform, glsl_get_cl_type_size_align);
|
||||
|
||||
@@ -878,6 +866,25 @@ clc_spirv_to_dxil(struct clc_libclc *lib,
|
||||
nir_var_mem_shared | nir_var_function_temp | nir_var_mem_global | nir_var_mem_constant,
|
||||
glsl_get_cl_type_size_align);
|
||||
|
||||
// Lower memcpy - needs to wait until types are sized
|
||||
{
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
NIR_PASS(progress, nir, nir_opt_memcpy);
|
||||
NIR_PASS(progress, nir, nir_copy_prop);
|
||||
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, nir, nir_opt_deref);
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
NIR_PASS(progress, nir, nir_split_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_var_copies);
|
||||
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(progress, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, nir, nir_opt_cse);
|
||||
} while (progress);
|
||||
}
|
||||
NIR_PASS_V(nir, nir_lower_memcpy);
|
||||
|
||||
NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp);
|
||||
NIR_PASS_V(nir, clc_lower_constant_to_ssbo, out_dxil->kernel, &uav_id);
|
||||
NIR_PASS_V(nir, clc_lower_global_to_ssbo);
|
||||
|
||||
Reference in New Issue
Block a user