nir,ac/llvm,radeonsi: replace nir_buffer_atomic_add_amd with ssbo atomic
Now that radeonsi support pass desc to ssbo atomic ops, we can use ssbo atomic instead. aco does not implement nir_buffer_atomic_add either. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23096>
This commit is contained in:
@@ -3746,29 +3746,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
||||
ac_build_atomic_rmw(&ctx->ac, LLVMAtomicRMWBinOpAdd, gds_base, store_val, "workgroup-one-as");
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_buffer_atomic_add_amd: {
|
||||
LLVMValueRef desc = get_src(ctx, instr->src[0]);
|
||||
LLVMValueRef data = get_src(ctx, instr->src[1]);
|
||||
unsigned base = nir_intrinsic_base(instr);
|
||||
LLVMTypeRef return_type = LLVMTypeOf(data);
|
||||
unsigned cache_flags =
|
||||
ac_get_hw_cache_flags(ctx->ac.gfx_level,
|
||||
ac_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
|
||||
|
||||
LLVMValueRef args[] = {
|
||||
data, desc,
|
||||
LLVMConstInt(ctx->ac.i32, base, false),
|
||||
ctx->ac.i32_0, /* soffset */
|
||||
LLVMConstInt(ctx->ac.i32, cache_flags, 0),
|
||||
};
|
||||
|
||||
char name[64], type[8];
|
||||
ac_build_type_name_for_intr(return_type, type, sizeof(type));
|
||||
snprintf(name, sizeof(name), "llvm.amdgcn.raw.buffer.atomic.add.%s", type);
|
||||
|
||||
result = ac_build_intrinsic(&ctx->ac, name, return_type, args, 5, 0);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_elect:
|
||||
result = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, visit_first_invocation(ctx),
|
||||
ac_get_thread_id(&ctx->ac), "");
|
||||
|
||||
@@ -558,7 +558,6 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
|
||||
case nir_intrinsic_load_packed_passthrough_primitive_amd:
|
||||
case nir_intrinsic_load_initial_edgeflags_amd:
|
||||
case nir_intrinsic_gds_atomic_add_amd:
|
||||
case nir_intrinsic_buffer_atomic_add_amd:
|
||||
case nir_intrinsic_load_rt_arg_scratch_offset_amd:
|
||||
case nir_intrinsic_load_intersection_opaque_amd:
|
||||
case nir_intrinsic_load_vector_arg_amd:
|
||||
|
||||
@@ -1363,9 +1363,6 @@ store("global_amd", [1, 1], indices=[BASE, ACCESS, ALIGN_MUL, ALIGN_OFFSET, WRIT
|
||||
# Same as shared_atomic_add, but with GDS. src[] = {store_val, gds_addr, m0}
|
||||
intrinsic("gds_atomic_add_amd", src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
|
||||
|
||||
# src[] = { descriptor, add_value }
|
||||
intrinsic("buffer_atomic_add_amd", src_comp=[4, 1], dest_comp=1, indices=[BASE])
|
||||
|
||||
# src[] = { sample_id, num_samples }
|
||||
intrinsic("load_sample_positions_amd", src_comp=[1, 1], dest_comp=2, flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
|
||||
@@ -401,7 +401,8 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
||||
unsigned offset = si_query_pipestat_end_dw_offset(sel->screen, index) * 4;
|
||||
|
||||
nir_ssa_def *count = intrin->src[0].ssa;
|
||||
nir_buffer_atomic_add_amd(b, 32, buf, count, .base = offset);
|
||||
nir_ssbo_atomic(b, 32, buf, nir_imm_int(b, offset), count,
|
||||
.atomic_op = nir_atomic_op_iadd);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_atomic_add_gen_prim_count_amd:
|
||||
@@ -414,7 +415,8 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
|
||||
offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives);
|
||||
|
||||
nir_ssa_def *prim_count = intrin->src[0].ssa;
|
||||
nir_buffer_atomic_add_amd(b, 32, buf, prim_count, .base = offset);
|
||||
nir_ssbo_atomic(b, 32, buf, nir_imm_int(b, offset), prim_count,
|
||||
.atomic_op = nir_atomic_op_iadd);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ring_attr_amd:
|
||||
|
||||
Reference in New Issue
Block a user