diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index a8990f85dba..5262e254835 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4877,6 +4877,23 @@ bool nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes, nir_address_format); +typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul, + unsigned align_offset, + unsigned bit_size, + unsigned num_components, + nir_intrinsic_instr *low, nir_intrinsic_instr *high, + void *data); + +typedef struct { + nir_should_vectorize_mem_func callback; + nir_variable_mode modes; + nir_variable_mode robust_modes; + void *cb_data; + bool has_shared2_amd; +} nir_load_store_vectorize_options; + +bool nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_options *options); + typedef struct nir_lower_shader_calls_options { /* Address format used for load/store operations on the call stack. */ nir_address_format address_format; @@ -4888,6 +4905,15 @@ typedef struct nir_lower_shader_calls_options { * You might want to disable combined_loads for best effects. */ bool localized_loads; + + /* If this function pointer is not NULL, lower_shader_calls will run + * nir_opt_load_store_vectorize for stack load/store operations. Otherwise + * the optimizaion is not run. + */ + nir_should_vectorize_mem_func vectorizer_callback; + + /* Data passed to vectorizer_callback */ + void *vectorizer_data; } nir_lower_shader_calls_options; bool @@ -5679,23 +5705,6 @@ bool nir_opt_ray_queries(nir_shader *shader); bool nir_opt_ray_query_ranges(nir_shader *shader); -typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul, - unsigned align_offset, - unsigned bit_size, - unsigned num_components, - nir_intrinsic_instr *low, nir_intrinsic_instr *high, - void *data); - -typedef struct { - nir_should_vectorize_mem_func callback; - nir_variable_mode modes; - nir_variable_mode robust_modes; - void *cb_data; - bool has_shared2_amd; -} nir_load_store_vectorize_options; - -bool nir_opt_load_store_vectorize(nir_shader *shader, const nir_load_store_vectorize_options *options); - void nir_sweep(nir_shader *shader); void nir_remap_dual_slot_attributes(nir_shader *shader, diff --git a/src/compiler/nir/nir_lower_shader_calls.c b/src/compiler/nir/nir_lower_shader_calls.c index 01e145eee55..eba34ec9740 100644 --- a/src/compiler/nir/nir_lower_shader_calls.c +++ b/src/compiler/nir/nir_lower_shader_calls.c @@ -1851,6 +1851,33 @@ nir_split_stack_components(nir_shader *shader) NULL); } +struct stack_op_vectorizer_state { + nir_should_vectorize_mem_func driver_callback; + void *driver_data; +}; + +static bool +should_vectorize(unsigned align_mul, + unsigned align_offset, + unsigned bit_size, + unsigned num_components, + nir_intrinsic_instr *low, nir_intrinsic_instr *high, + void *data) +{ + /* We only care about those intrinsics */ + if ((low->intrinsic != nir_intrinsic_load_stack && + low->intrinsic != nir_intrinsic_store_stack) || + (high->intrinsic != nir_intrinsic_load_stack && + high->intrinsic != nir_intrinsic_store_stack)) + return false; + + struct stack_op_vectorizer_state *state = data; + + return state->driver_callback(align_mul, align_offset, + bit_size, num_components, + low, high, state->driver_data); +} + /** Lower shader call instructions to split shaders. * * Shader calls can be split into an initial shader and a series of "resume" @@ -1959,9 +1986,27 @@ nir_lower_shader_calls(nir_shader *shader, NIR_PASS_V(resume_shaders[i], nir_opt_stack_loads); } + struct stack_op_vectorizer_state vectorizer_state = { + .driver_callback = options->vectorizer_callback, + .driver_data = options->vectorizer_data, + }; + nir_load_store_vectorize_options vect_opts = { + .modes = nir_var_shader_temp, + .callback = should_vectorize, + .cb_data = &vectorizer_state, + }; + + if (options->vectorizer_callback != NULL) { + NIR_PASS_V(shader, nir_split_stack_components); + NIR_PASS_V(shader, nir_opt_load_store_vectorize, &vect_opts); + } NIR_PASS_V(shader, nir_lower_stack_to_scratch, options->address_format); nir_opt_cse(shader); for (unsigned i = 0; i < num_calls; i++) { + if (options->vectorizer_callback != NULL) { + NIR_PASS_V(shader, nir_split_stack_components); + NIR_PASS_V(shader, nir_opt_load_store_vectorize, &vect_opts); + } NIR_PASS_V(resume_shaders[i], nir_lower_stack_to_scratch, options->address_format); nir_opt_cse(resume_shaders[i]); diff --git a/src/compiler/nir/nir_opt_load_store_vectorize.c b/src/compiler/nir/nir_opt_load_store_vectorize.c index 067b2dea78c..438fda234a1 100644 --- a/src/compiler/nir/nir_opt_load_store_vectorize.c +++ b/src/compiler/nir/nir_opt_load_store_vectorize.c @@ -154,6 +154,8 @@ case nir_intrinsic_##op: {\ ATOMIC(nir_var_mem_task_payload, task_payload, fmin, -1, 0, -1, 1) ATOMIC(nir_var_mem_task_payload, task_payload, fmax, -1, 0, -1, 1) ATOMIC(nir_var_mem_task_payload, task_payload, fcomp_swap, -1, 0, -1, 1) + LOAD(nir_var_shader_temp, stack, -1, -1, -1) + STORE(nir_var_shader_temp, stack, -1, -1, -1, 0) default: break; #undef ATOMIC