aco/gfx9+: use d16 global/scratch/buffer loads
Full register loads are not nessecary and prevent packing optimizations. Global/Scratch is GFX9+ so D16 loads are always supported. We already used LDS D16 loads. Foz-DB Navi31(mostly RA noise): Totals from 716 (0.90% of 79789) affected shaders: Instrs: 3854176 -> 3854238 (+0.00%); split: -0.00%, +0.00% CodeSize: 20034440 -> 20035220 (+0.00%); split: -0.00%, +0.00% Latency: 24410951 -> 24411120 (+0.00%) InvThroughput: 5181276 -> 5181301 (+0.00%) Copies: 320258 -> 320317 (+0.02%) VALU: 2207307 -> 2207366 (+0.00%) Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34346>
This commit is contained in:
@@ -4587,10 +4587,12 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
|
||||
aco_opcode op;
|
||||
if (bytes_needed == 1 || align_ % 2) {
|
||||
bytes_size = 1;
|
||||
op = aco_opcode::buffer_load_ubyte;
|
||||
op = bld.program->gfx_level >= GFX9 ? aco_opcode::buffer_load_ubyte_d16
|
||||
: aco_opcode::buffer_load_ubyte;
|
||||
} else if (bytes_needed == 2 || align_ % 4) {
|
||||
bytes_size = 2;
|
||||
op = aco_opcode::buffer_load_ushort;
|
||||
op = bld.program->gfx_level >= GFX9 ? aco_opcode::buffer_load_short_d16
|
||||
: aco_opcode::buffer_load_ushort;
|
||||
} else if (bytes_needed <= 4) {
|
||||
bytes_size = 4;
|
||||
op = aco_opcode::buffer_load_dword;
|
||||
@@ -4695,10 +4697,10 @@ scratch_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsig
|
||||
aco_opcode op;
|
||||
if (bytes_needed == 1 || align_ % 2u) {
|
||||
bytes_size = 1;
|
||||
op = aco_opcode::scratch_load_ubyte;
|
||||
op = aco_opcode::scratch_load_ubyte_d16;
|
||||
} else if (bytes_needed == 2 || align_ % 4u) {
|
||||
bytes_size = 2;
|
||||
op = aco_opcode::scratch_load_ushort;
|
||||
op = aco_opcode::scratch_load_short_d16;
|
||||
} else if (bytes_needed <= 4) {
|
||||
bytes_size = 4;
|
||||
op = aco_opcode::scratch_load_dword;
|
||||
@@ -4849,12 +4851,12 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign
|
||||
if (bytes_needed == 1 || align_ % 2u) {
|
||||
bytes_size = 1;
|
||||
op = use_mubuf ? aco_opcode::buffer_load_ubyte
|
||||
: global ? aco_opcode::global_load_ubyte
|
||||
: global ? aco_opcode::global_load_ubyte_d16
|
||||
: aco_opcode::flat_load_ubyte;
|
||||
} else if (bytes_needed == 2 || align_ % 4u) {
|
||||
bytes_size = 2;
|
||||
op = use_mubuf ? aco_opcode::buffer_load_ushort
|
||||
: global ? aco_opcode::global_load_ushort
|
||||
: global ? aco_opcode::global_load_short_d16
|
||||
: aco_opcode::flat_load_ushort;
|
||||
} else if (bytes_needed <= 4) {
|
||||
bytes_size = 4;
|
||||
|
||||
Reference in New Issue
Block a user