diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index f1ae876e321..96c89fb174b 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1435,11 +1435,11 @@ load("shared_ir3", [1], [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) # src[] = { value, address(vec2 of hi+lo uint32_t), offset }. # const_index[] = { write_mask, align_mul, align_offset } -store("global_ir3", [2, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET]) +store("global_ir3", [1, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET]) # src[] = { address(vec2 of hi+lo uint32_t), offset }. # const_index[] = { access, align_mul, align_offset } # the alignment applies to the base address -load("global_ir3", [2, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE], flags=[CAN_ELIMINATE]) +load("global_ir3", [1, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE], flags=[CAN_ELIMINATE]) # Etnaviv-specific load/glboal intrinsics. They take a 32-bit base address and # a 32-bit offset, which doesn't need to be an immediate. @@ -1489,7 +1489,7 @@ intrinsic("copy_ubo_to_uniform_ir3", [1, 1], indices=[BASE, RANGE]) # IR3-specific intrinsic for ldg.k. # base is an offset to apply to the address in bytes, range_base is the # const file base in components, range is the amount to copy in vec4's. -intrinsic("copy_global_to_uniform_ir3", [2], indices=[BASE, RANGE_BASE, RANGE]) +intrinsic("copy_global_to_uniform_ir3", [1], indices=[BASE, RANGE_BASE, RANGE]) # IR3-specific intrinsic for stsc. Loads from push consts to constant file # Should be used in the shader preamble. diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index f4f229653b9..f7158394c29 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -416,8 +416,7 @@ emit_intrinsic_load_global_ir3(struct ir3_context *ctx, unsigned dest_components = nir_intrinsic_dest_components(intr); struct ir3_instruction *addr, *offset; - addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0], - ir3_get_src(ctx, &intr->src[0])[1]); + addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0]); struct ir3_instruction *load; @@ -459,8 +458,7 @@ emit_intrinsic_store_global_ir3(struct ir3_context *ctx, struct ir3_instruction *value, *addr, *offset; unsigned ncomp = nir_intrinsic_src_components(intr, 0); - addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[1])[0], - ir3_get_src(ctx, &intr->src[1])[1]); + addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[1])[0]); value = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp); @@ -507,8 +505,7 @@ emit_intrinsic_atomic_global(struct ir3_context *ctx, nir_intrinsic_instr *intr) type = TYPE_ATOMIC_U64; } - addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0], - ir3_get_src(ctx, &intr->src[0])[1]); + addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0]); if (op == nir_atomic_op_cmpxchg) { struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[2])[0]; diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 39c9704e4af..f56692e9b73 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1246,9 +1246,8 @@ emit_intrinsic_copy_global_to_uniform(struct ir3_context *ctx, if (dst_hi) a1 = ir3_create_addr1(&ctx->build, dst_hi << 8); - struct ir3_instruction *addr_lo = ir3_get_src(ctx, &intr->src[0])[0]; - struct ir3_instruction *addr_hi = ir3_get_src(ctx, &intr->src[0])[1]; - struct ir3_instruction *addr = ir3_collect(b, addr_lo, addr_hi); + struct ir3_instruction *addr = + ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0]); struct ir3_instruction *ldg = ir3_LDG_K(b, create_immed(b, dst_lo), 0, addr, 0, create_immed(b, addr_offset), 0, create_immed(b, size), 0); @@ -3268,8 +3267,8 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_bindless_resource_ir3: dst[0] = ir3_get_src(ctx, &intr->src[0])[0]; break; - case nir_intrinsic_global_atomic_ir3: - case nir_intrinsic_global_atomic_swap_ir3: { + case nir_intrinsic_global_atomic: + case nir_intrinsic_global_atomic_swap: { dst[0] = ctx->funcs->emit_intrinsic_atomic_global(ctx, intr); break; } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 253bea87b7e..c77a6e31051 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -569,8 +569,7 @@ lower_shader_clock(struct nir_builder *b, nir_intrinsic_instr *instr, void *data nir_push_if(b, nir_elect(b, 1)); { /* ALWAYSON counter is mapped to this address. */ - nir_def *base_addr = - nir_unpack_64_2x32(b, nir_imm_int64(b, uche_trap_base)); + nir_def *base_addr = nir_imm_int64(b, uche_trap_base); /* Reading _LO first presumably latches _HI making the read atomic. */ nir_def *clock_lo = nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 0)); diff --git a/src/freedreno/ir3/ir3_nir_lower_64b.c b/src/freedreno/ir3/ir3_nir_lower_64b.c index 443f8c194ab..6460a236e7b 100644 --- a/src/freedreno/ir3/ir3_nir_lower_64b.c +++ b/src/freedreno/ir3/ir3_nir_lower_64b.c @@ -36,8 +36,8 @@ lower_64b_intrinsics_filter(const nir_instr *instr, const void *unused) /* skip over ssbo atomics, we'll lower them later */ if (intr->intrinsic == nir_intrinsic_ssbo_atomic || intr->intrinsic == nir_intrinsic_ssbo_atomic_swap || - intr->intrinsic == nir_intrinsic_global_atomic_ir3 || - intr->intrinsic == nir_intrinsic_global_atomic_swap_ir3) + intr->intrinsic == nir_intrinsic_global_atomic || + intr->intrinsic == nir_intrinsic_global_atomic_swap) return false; if (nir_intrinsic_dest_components(intr) == 0) @@ -226,8 +226,6 @@ lower_64b_global_filter(const nir_instr *instr, const void *unused) case nir_intrinsic_load_global: case nir_intrinsic_load_global_constant: case nir_intrinsic_store_global: - case nir_intrinsic_global_atomic: - case nir_intrinsic_global_atomic_swap: return true; default: return false; @@ -242,26 +240,13 @@ lower_64b_global(nir_builder *b, nir_instr *instr, void *unused) nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); bool load = intr->intrinsic != nir_intrinsic_store_global; - nir_def *addr64 = intr->src[load ? 0 : 1].ssa; - nir_def *addr = nir_unpack_64_2x32(b, addr64); + nir_def *addr = intr->src[load ? 0 : 1].ssa; /* * Note that we can get vec8/vec16 with OpenCL.. we need to split * those up into max 4 components per load/store. */ - if (intr->intrinsic == nir_intrinsic_global_atomic) { - return nir_global_atomic_ir3( - b, intr->def.bit_size, addr, - intr->src[1].ssa, - .atomic_op = nir_intrinsic_atomic_op(intr)); - } else if (intr->intrinsic == nir_intrinsic_global_atomic_swap) { - return nir_global_atomic_swap_ir3( - b, intr->def.bit_size, addr, - intr->src[1].ssa, intr->src[2].ssa, - .atomic_op = nir_intrinsic_atomic_op(intr)); - } - enum gl_access_qualifier access = nir_intrinsic_access(intr); if (load) { diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c index 17714afdfb2..a3508822f2d 100644 --- a/src/freedreno/ir3/ir3_nir_lower_tess.c +++ b/src/freedreno/ir3/ir3_nir_lower_tess.c @@ -55,6 +55,18 @@ build_local_primitive_id(nir_builder *b, struct state *state) 63); } +static nir_def * +load_tess_param_base(nir_builder *b) +{ + return nir_pack_64_2x32(b, nir_load_tess_param_base_ir3(b)); +} + +static nir_def * +load_tess_factor_base(nir_builder *b) +{ + return nir_pack_64_2x32(b, nir_load_tess_factor_base_ir3(b)); +} + static bool is_tess_levels(gl_varying_slot slot) { @@ -517,7 +529,7 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) b->cursor = nir_before_instr(&intr->instr); - nir_def *address = nir_load_tess_param_base_ir3(b); + nir_def *address = load_tess_param_base(b); nir_def *offset = build_per_vertex_offset( b, state, intr->src[0].ssa, nir_intrinsic_io_semantics(intr).location, @@ -538,7 +550,7 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1)); nir_def *value = intr->src[0].ssa; - nir_def *address = nir_load_tess_param_base_ir3(b); + nir_def *address = load_tess_param_base(b); nir_def *offset = build_per_vertex_offset( b, state, intr->src[1].ssa, nir_intrinsic_io_semantics(intr).location, @@ -565,11 +577,11 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) gl_varying_slot location = nir_intrinsic_io_semantics(intr).location; if (is_tess_levels(location)) { assert(intr->def.num_components == 1); - address = nir_load_tess_factor_base_ir3(b); + address = load_tess_factor_base(b); offset = build_tessfactor_base( b, location, nir_intrinsic_component(intr), state); } else { - address = nir_load_tess_param_base_ir3(b); + address = load_tess_param_base(b); offset = build_patch_offset(b, state, location, nir_intrinsic_component(intr), intr->src[0].ssa); @@ -620,14 +632,14 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state) replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, intr->src[0].ssa, - nir_load_tess_factor_base_ir3(b), + load_tess_factor_base(b), nir_iadd(b, intr->src[1].ssa, offset)); if (location != VARYING_SLOT_PRIMITIVE_ID) { nir_pop_if(b, nif); } } else { - nir_def *address = nir_load_tess_param_base_ir3(b); + nir_def *address = load_tess_param_base(b); nir_def *offset = build_patch_offset( b, state, location, nir_intrinsic_component(intr), intr->src[1].ssa); @@ -729,7 +741,7 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state) b->cursor = nir_before_instr(&intr->instr); - nir_def *address = nir_load_tess_param_base_ir3(b); + nir_def *address = load_tess_param_base(b); nir_def *offset = build_per_vertex_offset( b, state, intr->src[0].ssa, nir_intrinsic_io_semantics(intr).location, @@ -755,11 +767,11 @@ lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state) gl_varying_slot location = nir_intrinsic_io_semantics(intr).location; if (is_tess_levels(location)) { assert(intr->def.num_components == 1); - address = nir_load_tess_factor_base_ir3(b); + address = load_tess_factor_base(b); offset = build_tessfactor_base( b, location, nir_intrinsic_component(intr), state); } else { - address = nir_load_tess_param_base_ir3(b); + address = load_tess_param_base(b); offset = build_patch_offset(b, state, location, nir_intrinsic_component(intr), intr->src[0].ssa); diff --git a/src/freedreno/vulkan/tu_nir_lower_ray_query.cc b/src/freedreno/vulkan/tu_nir_lower_ray_query.cc index 0499ee52bfa..c4bcbe05e45 100644 --- a/src/freedreno/vulkan/tu_nir_lower_ray_query.cc +++ b/src/freedreno/vulkan/tu_nir_lower_ray_query.cc @@ -262,7 +262,7 @@ load_tlas(nir_builder *b, nir_def *tlas, .align_offset = offset); } else { return nir_load_global_ir3(b, components, 32, - tlas, + nir_pack_64_2x32(b, tlas), nir_iadd_imm(b, nir_imul_imm(b, index, AS_RECORD_SIZE / 4), offset / 4), /* The required alignment of the diff --git a/src/freedreno/vulkan/tu_shader.cc b/src/freedreno/vulkan/tu_shader.cc index 64bc9c0eabb..541e5e5a26f 100644 --- a/src/freedreno/vulkan/tu_shader.cc +++ b/src/freedreno/vulkan/tu_shader.cc @@ -742,7 +742,8 @@ lower_inline_ubo(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data) } val = nir_load_global_ir3(b, intrin->num_components, intrin->def.bit_size, - base_addr, nir_ishr_imm(b, offset, 2), + nir_pack_64_2x32(b, base_addr), + nir_ishr_imm(b, offset, 2), .access = (enum gl_access_qualifier)( (enum gl_access_qualifier)(ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER) |