From 18d3e4fecdc33ca83ac358bebe7e3d99b2339d66 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 5 Dec 2022 17:32:15 +0000 Subject: [PATCH] radv,aco: use ac_nir_lower_legacy_gs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Timur Kristóf Part-of: --- .../compiler/aco_instruction_selection.cpp | 104 +----------------- src/amd/vulkan/radv_nir_to_llvm.c | 37 ------- src/amd/vulkan/radv_pipeline.c | 16 ++- 3 files changed, 15 insertions(+), 142 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 4a5dacb11c1..cc01fce66ed 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7760,102 +7760,6 @@ visit_store_scratch(isel_context* ctx, nir_intrinsic_instr* instr) } } -void -visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr) -{ - Builder bld(ctx->program, ctx->block); - - unsigned stream = nir_intrinsic_stream_id(instr); - Temp next_vertex = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa)); - next_vertex = bld.v_mul_imm(bld.def(v1), next_vertex, 4u); - nir_const_value* next_vertex_cv = nir_src_as_const_value(instr->src[0]); - - /* get GSVS ring */ - Temp gsvs_ring = - bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, - Operand::c32(RING_GSVS_GS * 16u)); - - unsigned num_components = ctx->program->info.gs.num_stream_output_components[stream]; - - unsigned stride = 4u * num_components * ctx->shader->info.gs.vertices_out; - unsigned stream_offset = 0; - for (unsigned i = 0; i < stream; i++) { - unsigned prev_stride = 4u * ctx->program->info.gs.num_stream_output_components[i] * - ctx->shader->info.gs.vertices_out; - stream_offset += prev_stride * ctx->program->wave_size; - } - - /* Limit on the stride field for <= GFX7. */ - assert(stride < (1 << 14)); - - Temp gsvs_dwords[4]; - for (unsigned i = 0; i < 4; i++) - gsvs_dwords[i] = bld.tmp(s1); - bld.pseudo(aco_opcode::p_split_vector, Definition(gsvs_dwords[0]), Definition(gsvs_dwords[1]), - Definition(gsvs_dwords[2]), Definition(gsvs_dwords[3]), gsvs_ring); - - if (stream_offset) { - Temp stream_offset_tmp = bld.copy(bld.def(s1), Operand::c32(stream_offset)); - - Temp carry = bld.tmp(s1); - gsvs_dwords[0] = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), - gsvs_dwords[0], stream_offset_tmp); - gsvs_dwords[1] = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), - gsvs_dwords[1], Operand::zero(), bld.scc(carry)); - } - - gsvs_dwords[1] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), gsvs_dwords[1], - Operand::c32(S_008F04_STRIDE(stride))); - gsvs_dwords[2] = bld.copy(bld.def(s1), Operand::c32(ctx->program->wave_size)); - - gsvs_ring = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), gsvs_dwords[0], gsvs_dwords[1], - gsvs_dwords[2], gsvs_dwords[3]); - - unsigned offset = 0; - for (unsigned i = 0; i <= VARYING_SLOT_VAR31; i++) { - for (unsigned j = 0; j < 4; j++) { - if (((ctx->program->info.gs.output_streams[i] >> (j * 2)) & 0x3) != stream) - continue; - if (!(ctx->program->info.gs.output_usage_mask[i] & (1 << j))) - continue; - - if (ctx->outputs.mask[i] & (1 << j)) { - Operand vaddr_offset = next_vertex_cv ? Operand(v1) : Operand(next_vertex); - unsigned const_offset = (offset + (next_vertex_cv ? next_vertex_cv->u32 : 0u)) * 4u; - if (const_offset >= 4096u) { - if (vaddr_offset.isUndefined()) - vaddr_offset = bld.copy(bld.def(v1), Operand::c32(const_offset / 4096u * 4096u)); - else - vaddr_offset = bld.vadd32(bld.def(v1), Operand::c32(const_offset / 4096u * 4096u), - vaddr_offset); - const_offset %= 4096u; - } - - aco_ptr mubuf{create_instruction( - aco_opcode::buffer_store_dword, Format::MUBUF, 4, 0)}; - mubuf->operands[0] = Operand(gsvs_ring); - mubuf->operands[1] = vaddr_offset; - mubuf->operands[2] = Operand(get_arg(ctx, ctx->args->ac.gs2vs_offset)); - mubuf->operands[3] = Operand(ctx->outputs.temps[i * 4u + j]); - mubuf->offen = !vaddr_offset.isUndefined(); - mubuf->offset = const_offset; - mubuf->glc = ctx->program->gfx_level < GFX11; - mubuf->slc = true; - mubuf->sync = memory_sync_info(storage_vmem_output, semantic_can_reorder); - bld.insert(std::move(mubuf)); - } - - offset += ctx->shader->info.gs.vertices_out; - } - - /* outputs for the next vertex are undefined and keeping them around can - * create invalid IR with control flow */ - ctx->outputs.mask[i] = 0; - } - - bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(false, true, stream)); -} - Temp emit_boolean_reduce(isel_context* ctx, nir_op op, unsigned cluster_size, Temp src) { @@ -9170,7 +9074,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) } case nir_intrinsic_emit_vertex_with_counter: { assert(ctx->stage.hw == HWStage::GS); - visit_emit_vertex_with_counter(ctx, instr); + unsigned stream = nir_intrinsic_stream_id(instr); + bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(false, true, stream)); break; } case nir_intrinsic_end_primitive_with_counter: { @@ -9181,11 +9086,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) } break; } - case nir_intrinsic_set_vertex_and_primitive_count: { - assert(ctx->stage.hw == HWStage::GS); - /* unused in the legacy pipeline, the HW keeps track of this for us */ - break; - } case nir_intrinsic_is_subgroup_invocation_lt_amd: { Temp src = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), lanecount_to_mask(ctx, src)); diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 2895da4130e..bb709d58b9d 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -194,44 +194,7 @@ static void visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef vertexidx, LLVMValueRef *addrs) { - unsigned offset = 0; struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - unsigned output_usage_mask = ctx->shader_info->gs.output_usage_mask[i]; - uint8_t output_stream = ctx->shader_info->gs.output_streams[i]; - LLVMValueRef *out_ptr = &addrs[i * 4]; - bool *is_16bit_ptr = &abi->is_16bit[i * 4]; - int length = util_last_bit(output_usage_mask); - - if (!(ctx->output_mask & (1ull << i))) - continue; - - for (unsigned j = 0; j < length; j++) { - if (((output_stream >> (j * 2)) & 0x3) != stream) - continue; - if (!(output_usage_mask & (1 << j))) - continue; - - LLVMTypeRef type = is_16bit_ptr[j] ? ctx->ac.f16 : ctx->ac.f32; - LLVMValueRef out_val = LLVMBuildLoad2(ctx->ac.builder, type, out_ptr[j], ""); - LLVMValueRef voffset = - LLVMConstInt(ctx->ac.i32, offset * ctx->shader->info.gs.vertices_out, false); - - offset++; - - voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, ""); - voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), ""); - - out_val = ac_to_integer(&ctx->ac, out_val); - out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); - - ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, NULL, voffset, - ac_get_arg(&ctx->ac, ctx->args->ac.gs2vs_offset), - ac_glc | ac_slc | ac_swizzled); - } - } - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id); } diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index dfaaf5cffb9..8e1f1a46157 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3475,9 +3475,19 @@ radv_postprocess_nir(struct radv_pipeline *pipeline, if (lowered_ngg) radv_lower_ngg(device, stage, pipeline_key); - if (stage->stage == last_vgt_api_stage && stage->stage != MESA_SHADER_GEOMETRY && !lowered_ngg) - NIR_PASS_V(stage->nir, ac_nir_lower_legacy_vs, - stage->info.outinfo.export_prim_id ? VARYING_SLOT_PRIMITIVE_ID : -1, false); + if (stage->stage == last_vgt_api_stage && !lowered_ngg) { + if (stage->stage != MESA_SHADER_GEOMETRY) { + NIR_PASS_V(stage->nir, ac_nir_lower_legacy_vs, + stage->info.outinfo.export_prim_id ? VARYING_SLOT_PRIMITIVE_ID : -1, false); + + } else { + ac_nir_gs_output_info gs_out_info = { + .streams = stage->info.gs.output_streams, + .usage_mask = stage->info.gs.output_usage_mask, + }; + NIR_PASS_V(stage->nir, ac_nir_lower_legacy_gs, false, false, &gs_out_info); + } + } NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8);