diff --git a/src/broadcom/compiler/v3d_nir_lower_load_store_bitsize.c b/src/broadcom/compiler/v3d_nir_lower_load_store_bitsize.c index 77f9054479d..df3fb33d2ba 100644 --- a/src/broadcom/compiler/v3d_nir_lower_load_store_bitsize.c +++ b/src/broadcom/compiler/v3d_nir_lower_load_store_bitsize.c @@ -133,6 +133,16 @@ v3d_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes, uint32_t align_offset, bool offset_is_const, const void *cb_data) { + /* we only support single component 32 bit load/stores on scratch */ + if (intrin == nir_intrinsic_load_scratch || + intrin == nir_intrinsic_store_scratch) { + return (nir_mem_access_size_align){ + .num_components = 1, + .bit_size = 32, + .align = 4, + }; + } + align = nir_combined_align(align, align_offset); assert(util_is_power_of_two_nonzero(align)); @@ -210,7 +220,7 @@ v3d_nir_lower_load_store_bitsize(nir_shader *s) nir_lower_mem_access_bit_sizes_options lower_options = { .modes = nir_var_mem_global | nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_constant | - nir_var_mem_shared, + nir_var_mem_shared | nir_var_function_temp, .callback = v3d_size_align_cb, }; diff --git a/src/broadcom/compiler/v3d_nir_lower_scratch.c b/src/broadcom/compiler/v3d_nir_lower_scratch.c index 93ed1bb6e26..c65a96dcb2e 100644 --- a/src/broadcom/compiler/v3d_nir_lower_scratch.c +++ b/src/broadcom/compiler/v3d_nir_lower_scratch.c @@ -30,18 +30,17 @@ * * Swizzles around the addresses of * nir_intrinsic_load_scratch/nir_intrinsic_store_scratch so that a QPU stores - * a cacheline at a time per dword of scratch access, scalarizing and removing - * writemasks in the process. + * a cacheline at a time per dword of scratch access. */ static nir_def * v3d_nir_scratch_offset(nir_builder *b, nir_intrinsic_instr *instr) { - bool is_store = instr->intrinsic == nir_intrinsic_store_scratch; - nir_def *offset = instr->src[is_store ? 1 : 0].ssa; + b->cursor = nir_before_instr(&instr->instr); + nir_def *offset = nir_get_io_offset_src(instr)->ssa; assert(nir_intrinsic_align_mul(instr) >= 4); - assert(nir_intrinsic_align_offset(instr) == 0); + assert(nir_intrinsic_align_offset(instr) % 4 == 0); /* The spill_offset register will already have the subgroup ID (EIDX) * shifted and ORed in at bit 2, so all we need to do is to move the @@ -51,67 +50,13 @@ v3d_nir_scratch_offset(nir_builder *b, nir_intrinsic_instr *instr) } static void -v3d_nir_lower_load_scratch(nir_builder *b, nir_intrinsic_instr *instr) +v3d_nir_lower_scratch_instr(nir_builder *b, nir_intrinsic_instr *instr) { - b->cursor = nir_before_instr(&instr->instr); - - nir_def *offset = v3d_nir_scratch_offset(b,instr); - - nir_def *chans[NIR_MAX_VEC_COMPONENTS]; - for (int i = 0; i < instr->num_components; i++) { - nir_def *chan_offset = - nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4); - - nir_intrinsic_instr *chan_instr = - nir_intrinsic_instr_create(b->shader, instr->intrinsic); - chan_instr->num_components = 1; - nir_def_init(&chan_instr->instr, &chan_instr->def, 1, - instr->def.bit_size); - - chan_instr->src[0] = nir_src_for_ssa(chan_offset); - - nir_intrinsic_set_align(chan_instr, 4, 0); - - nir_builder_instr_insert(b, &chan_instr->instr); - - chans[i] = &chan_instr->def; - } - - nir_def *result = nir_vec(b, chans, instr->num_components); - nir_def_rewrite_uses(&instr->def, result); - nir_instr_remove(&instr->instr); -} - -static void -v3d_nir_lower_store_scratch(nir_builder *b, nir_intrinsic_instr *instr) -{ - b->cursor = nir_before_instr(&instr->instr); + /* scalarized through nir_lower_mem_access_bit_sizes */ + assert(instr->num_components == 1); nir_def *offset = v3d_nir_scratch_offset(b, instr); - nir_def *value = instr->src[0].ssa; - - for (int i = 0; i < instr->num_components; i++) { - if (!(nir_intrinsic_write_mask(instr) & (1 << i))) - continue; - - nir_def *chan_offset = - nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4); - - nir_intrinsic_instr *chan_instr = - nir_intrinsic_instr_create(b->shader, instr->intrinsic); - chan_instr->num_components = 1; - - chan_instr->src[0] = nir_src_for_ssa(nir_channel(b, - value, - i)); - chan_instr->src[1] = nir_src_for_ssa(chan_offset); - nir_intrinsic_set_write_mask(chan_instr, 0x1); - nir_intrinsic_set_align(chan_instr, 4, 0); - - nir_builder_instr_insert(b, &chan_instr->instr); - } - - nir_instr_remove(&instr->instr); + nir_src_rewrite(nir_get_io_offset_src(instr), offset); } static bool @@ -121,10 +66,8 @@ v3d_nir_lower_scratch_cb(nir_builder *b, { switch (intr->intrinsic) { case nir_intrinsic_load_scratch: - v3d_nir_lower_load_scratch(b, intr); - return true; case nir_intrinsic_store_scratch: - v3d_nir_lower_store_scratch(b, intr); + v3d_nir_lower_scratch_instr(b, intr); return true; default: return false; diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index a8d2a616f8b..9b190aced36 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -725,13 +725,7 @@ v3d_lower_nir(struct v3d_compile *c) } NIR_PASS(_, c->s, nir_lower_compute_system_values, NULL); - - NIR_PASS(_, c->s, nir_lower_vars_to_scratch, - nir_var_function_temp, - 0, - glsl_get_natural_size_align_bytes); NIR_PASS(_, c->s, nir_lower_is_helper_invocation); - NIR_PASS(_, c->s, v3d_nir_lower_scratch); NIR_PASS(_, c->s, v3d_nir_lower_null_pointers); } @@ -1708,10 +1702,15 @@ v3d_attempt_compile(struct v3d_compile *c) NIR_PASS(_, c->s, nir_lower_robust_access, &opts); } + NIR_PASS(_, c->s, nir_lower_vars_to_scratch, + nir_var_function_temp, + 0, + glsl_get_natural_size_align_bytes); + NIR_PASS(_, c->s, v3d_nir_lower_global_2x32); NIR_PASS(_, c->s, nir_lower_wrmasks, should_split_wrmask, c->s); - NIR_PASS(_, c->s, v3d_nir_lower_load_store_bitsize); + NIR_PASS(_, c->s, v3d_nir_lower_scratch); NIR_PASS(_, c->s, v3d_nir_lower_subgroup_intrinsics, c);