diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index eb91f6620a4..e892193b2c4 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -1153,22 +1153,24 @@ void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc /* buffer_store_dword(,x2,x3,x4) <- the suffix is selected by the type of vdata. */ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, - unsigned inst_offset, unsigned cache_policy) + unsigned cache_policy) { unsigned num_channels = ac_get_llvm_num_components(vdata); /* Split 3 channel stores if unsupported. */ if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) { - LLVMValueRef v[3], v01; + LLVMValueRef v[3], v01, voffset2; for (int i = 0; i < 3; i++) { v[i] = LLVMBuildExtractElement(ctx->builder, vdata, LLVMConstInt(ctx->i32, i, 0), ""); } v01 = ac_build_gather_values(ctx, v, 2); - ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, inst_offset, cache_policy); - ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset, soffset, inst_offset + 8, - cache_policy); + voffset2 = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0, + LLVMConstInt(ctx->i32, 8, 0), ""); + + ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, cache_policy); + ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset2, soffset, cache_policy); return; } @@ -1177,12 +1179,7 @@ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter. */ if (!(cache_policy & ac_swizzled)) { - LLVMValueRef offset = soffset; - - if (inst_offset) - offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, inst_offset, 0), ""); - - ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), vindex, voffset, offset, + ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), vindex, voffset, soffset, cache_policy, false); return; } @@ -1192,8 +1189,6 @@ void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, V_008F0C_BUF_DATA_FORMAT_32_32_32_32}; unsigned dfmt = dfmts[num_channels - 1]; unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; - LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0); - voffset = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0, immoffset, ""); ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset, num_channels, dfmt, nfmt, cache_policy); diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 89ebf23edb8..cf0e3f07c45 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -256,7 +256,7 @@ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx, void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata, LLVMValueRef vindex, LLVMValueRef voffset, LLVMValueRef soffset, - unsigned inst_offset, unsigned cache_policy); + unsigned cache_policy); void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data, LLVMValueRef vindex, LLVMValueRef voffset, unsigned cache_policy); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index e5d468cc93c..d8cc4e6786a 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1887,7 +1887,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *in data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, ""); ac_build_buffer_store_dword(&ctx->ac, rsrc, data, NULL, offset, - ctx->ac.i32_0, 0, cache_policy); + ctx->ac.i32_0, cache_policy); } } @@ -4288,8 +4288,9 @@ static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins if (slc) cache_policy |= ac_slc; - ac_build_buffer_store_dword(&ctx->ac, descriptor, store_data, - NULL, addr_voffset, addr_soffset, const_offset, + LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, addr_voffset, + LLVMConstInt(ctx->ac.i32, const_offset, 0), ""); + ac_build_buffer_store_dword(&ctx->ac, descriptor, store_data, NULL, voffset, addr_soffset, cache_policy); break; } diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 75de585cee2..01e8f0c9b29 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -280,7 +280,7 @@ visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMV out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, NULL, voffset, - ac_get_arg(&ctx->ac, ctx->args->ac.gs2vs_offset), 0, + ac_get_arg(&ctx->ac, ctx->args->ac.gs2vs_offset), ac_glc | ac_slc | ac_swizzled); } } @@ -834,8 +834,10 @@ radv_emit_stream_output(struct radv_shader_context *ctx, LLVMValueRef const *so_ break; } - ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf], vdata, NULL, - so_write_offsets[buf], ctx->ac.i32_0, offset, ac_glc | ac_slc); + LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, so_write_offsets[buf], + LLVMConstInt(ctx->ac.i32, offset, 0), ""); + ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf], vdata, NULL, voffset, ctx->ac.i32_0, + ac_glc | ac_slc); } static void diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 0bde0d99259..7655223e7cc 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -177,9 +177,10 @@ void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi) continue; } - ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, NULL, NULL, + ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, NULL, + LLVMConstInt(ctx->ac.i32, (4 * param + chan) * 4, 0), ac_get_arg(&ctx->ac, ctx->args.es2gs_offset), - (4 * param + chan) * 4, ac_glc | ac_slc | ac_swizzled); + ac_glc | ac_slc | ac_swizzled); } } @@ -279,7 +280,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVM out_val = ac_to_integer(&ctx->ac, out_val); ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, NULL, - voffset, soffset, 0, ac_glc | ac_slc | ac_swizzled); + voffset, soffset, ac_glc | ac_slc | ac_swizzled); } } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 18b203604c8..4e52f488296 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -537,8 +537,9 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, values[chan] = value; if (writemask != 0xF && !is_tess_factor) { - ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, addr, base, - 4 * chan, ac_glc); + LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, addr, + LLVMConstInt(ctx->ac.i32, 4 * chan, 0), ""); + ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, voffset, base, ac_glc); } /* Write tess factors into VGPRs for the epilog. */ @@ -555,7 +556,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, if (writemask == 0xF && !is_tess_factor) { LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4); - ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, addr, base, 0, ac_glc); + ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, addr, base, ac_glc); } } @@ -662,7 +663,7 @@ static void si_copy_tcs_inputs(struct si_shader_context *ctx) LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr); - ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, buffer_addr, buffer_offset, 0, + ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, buffer_addr, buffer_offset, ac_glc); } } @@ -775,18 +776,22 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re ac_build_ifcc(&ctx->ac, LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504); ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0), - NULL, ctx->ac.i32_0, tf_base, offset, ac_glc); + NULL, LLVMConstInt(ctx->ac.i32, offset, 0), tf_base, ac_glc); ac_build_endif(&ctx->ac, 6504); offset += 4; } /* Store the tessellation factors. */ - ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, NULL, byteoffset, - tf_base, offset, ac_glc); + ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, NULL, + LLVMBuildAdd(ctx->ac.builder, byteoffset, + LLVMConstInt(ctx->ac.i32, offset, 0), ""), + tf_base, ac_glc); offset += 16; if (vec1) - ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, NULL, byteoffset, - tf_base, offset, ac_glc); + ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, NULL, + LLVMBuildAdd(ctx->ac.builder, byteoffset, + LLVMConstInt(ctx->ac.i32, offset, 0), ""), + tf_base, ac_glc); /* Store the tess factors into the offchip buffer if TES reads them. */ if (shader->key.ge.part.tcs.epilog.tes_reads_tess_factors) { @@ -804,7 +809,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_comps); ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, NULL, tf_outer_offset, - base, 0, ac_glc); + base, ac_glc); if (inner_comps) { param_inner = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER); tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL, @@ -812,7 +817,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re inner_vec = ac_build_gather_values(&ctx->ac, inner, inner_comps); ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, NULL, - tf_inner_offset, base, 0, ac_glc); + tf_inner_offset, base, ac_glc); } } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index b54fc86ed2e..90b169ff198 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -304,8 +304,9 @@ void si_llvm_streamout_store_output(struct si_shader_context *ctx, LLVMValueRef } ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf_idx], vdata, NULL, - so_write_offsets[buf_idx], ctx->ac.i32_0, stream_out->dst_offset * 4, - ac_glc | ac_slc); + LLVMBuildAdd(ctx->ac.builder, so_write_offsets[buf_idx], + LLVMConstInt(ctx->ac.i32, stream_out->dst_offset * 4, 0), ""), + ctx->ac.i32_0, ac_glc | ac_slc); } /**