aco: Disable MUBUF/MTBUF offsets when they are zero.
Fossil DB stats on Rembrandt (GFX10.3): Totals from 1264 (0.94% of 134920) affected shaders: VGPRs: 69504 -> 69336 (-0.24%) CodeSize: 6885468 -> 6886224 (+0.01%); split: -0.02%, +0.03% MaxWaves: 24632 -> 24670 (+0.15%) Instrs: 1287027 -> 1287209 (+0.01%); split: -0.04%, +0.05% Latency: 6830411 -> 6831165 (+0.01%); split: -0.06%, +0.07% InvThroughput: 1220643 -> 1220438 (-0.02%); split: -0.04%, +0.02% VClause: 24737 -> 24751 (+0.06%); split: -0.25%, +0.30% SClause: 42774 -> 42911 (+0.32%); split: -0.13%, +0.45% Copies: 75408 -> 75600 (+0.25%); split: -0.62%, +0.88% PreVGPRs: 60544 -> 59809 (-1.21%) Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21930>
This commit is contained in:
@@ -4088,6 +4088,8 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
|
||||
Temp offset_tmp = offset.isTemp() ? offset.getTemp() : Temp();
|
||||
if (offset.isConstant()) {
|
||||
offset = Operand::c32(offset.constantValue() + to_add);
|
||||
} else if (offset.isUndefined()) {
|
||||
offset = Operand::c32(to_add);
|
||||
} else if (offset_tmp.regClass() == s1) {
|
||||
offset = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), offset_tmp,
|
||||
Operand::c32(to_add));
|
||||
@@ -4122,6 +4124,8 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
|
||||
Temp offset_tmp = offset.isTemp() ? offset.getTemp() : Temp();
|
||||
if (offset.isConstant()) {
|
||||
aligned_offset = Operand::c32(offset.constantValue() & 0xfffffffcu);
|
||||
} else if (offset.isUndefined()) {
|
||||
aligned_offset = Operand::zero();
|
||||
} else if (offset_tmp.regClass() == s1) {
|
||||
aligned_offset = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
|
||||
Operand::c32(0xfffffffcu), offset_tmp);
|
||||
@@ -4139,7 +4143,8 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
|
||||
}
|
||||
}
|
||||
Temp aligned_offset_tmp =
|
||||
aligned_offset.isTemp() ? aligned_offset.getTemp() : bld.copy(bld.def(s1), aligned_offset);
|
||||
aligned_offset.isTemp() ? aligned_offset.getTemp() :
|
||||
aligned_offset.isConstant() ? bld.copy(bld.def(s1), aligned_offset) : Temp(0, s1);
|
||||
|
||||
Temp val = params.callback(bld, info, aligned_offset_tmp, bytes_needed, align,
|
||||
reduced_const_offset, byte_align ? Temp() : info.dst);
|
||||
@@ -4157,6 +4162,8 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
|
||||
if (byte_align == -1) {
|
||||
if (offset.isConstant())
|
||||
byte_align_off = Operand::c32(offset.constantValue() % 4u);
|
||||
else if (offset.isUndefined())
|
||||
byte_align_off = Operand::zero();
|
||||
else if (offset.size() == 2)
|
||||
byte_align_off = Operand(emit_extract_vector(ctx, offset.getTemp(), 0,
|
||||
RegClass(offset.getTemp().type(), 1)));
|
||||
@@ -7032,11 +7039,13 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
bool idxen = !nir_src_is_const(intrin->src[3]) || nir_src_as_uint(intrin->src[3]);
|
||||
bool v_offset_zero = nir_src_is_const(intrin->src[1]) && !nir_src_as_uint(intrin->src[1]);
|
||||
bool s_offset_zero = nir_src_is_const(intrin->src[2]) && !nir_src_as_uint(intrin->src[2]);
|
||||
|
||||
Temp dst = get_ssa_temp(ctx, &intrin->dest.ssa);
|
||||
Temp descriptor = bld.as_uniform(get_ssa_temp(ctx, intrin->src[0].ssa));
|
||||
Temp v_offset = as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[1].ssa));
|
||||
Temp v_offset =
|
||||
v_offset_zero ? Temp(0, v1) : as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[1].ssa));
|
||||
Temp s_offset =
|
||||
s_offset_zero ? Temp(0, s1) : bld.as_uniform(get_ssa_temp(ctx, intrin->src[2].ssa));
|
||||
Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[3].ssa)) : Temp();
|
||||
@@ -7100,11 +7109,15 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
bool idxen = !nir_src_is_const(intrin->src[4]) || nir_src_as_uint(intrin->src[4]);
|
||||
bool v_offset_zero = nir_src_is_const(intrin->src[2]) && !nir_src_as_uint(intrin->src[2]);
|
||||
bool s_offset_zero = nir_src_is_const(intrin->src[3]) && !nir_src_as_uint(intrin->src[3]);
|
||||
|
||||
Temp store_src = get_ssa_temp(ctx, intrin->src[0].ssa);
|
||||
Temp descriptor = bld.as_uniform(get_ssa_temp(ctx, intrin->src[1].ssa));
|
||||
Temp v_offset = as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[2].ssa));
|
||||
Temp s_offset = bld.as_uniform(get_ssa_temp(ctx, intrin->src[3].ssa));
|
||||
Temp v_offset =
|
||||
v_offset_zero ? Temp(0, v1) : as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[2].ssa));
|
||||
Temp s_offset =
|
||||
s_offset_zero ? Temp(0, s1) : bld.as_uniform(get_ssa_temp(ctx, intrin->src[3].ssa));
|
||||
Temp idx = idxen ? as_vgpr(ctx, get_ssa_temp(ctx, intrin->src[4].ssa)) : Temp();
|
||||
|
||||
bool swizzled = nir_intrinsic_access(intrin) & ACCESS_IS_SWIZZLED_AMD;
|
||||
|
||||
Reference in New Issue
Block a user