diff --git a/src/asahi/compiler/agx_builder.h.py b/src/asahi/compiler/agx_builder.h.py index 31f61ebc64e..79f7bbd77a8 100644 --- a/src/asahi/compiler/agx_builder.h.py +++ b/src/asahi/compiler/agx_builder.h.py @@ -50,6 +50,10 @@ agx_${opcode}${suffix}(agx_builder *b , agx_index dst${dest} % endfor +% if op.variable_srcs: + , unsigned nr_srcs +% endif + % for src in range(srcs): , agx_index src${src} % endfor @@ -65,7 +69,10 @@ agx_${opcode}${suffix}(agx_builder *b I->dest[${dest}] = dst${dest}; % endfor -% if srcs > 0: +% if op.variable_srcs: + I->src = ralloc_array(I, agx_index, nr_srcs); + I->nr_srcs = nr_srcs; +% elif srcs > 0: I->src = ralloc_array(I, agx_index, ${srcs}); I->nr_srcs = ${srcs}; @@ -82,7 +89,7 @@ agx_${opcode}${suffix}(agx_builder *b return I; } -% if dests == 1: +% if dests == 1 and not op.variable_srcs: static inline agx_index agx_${opcode}(agx_builder *b diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 4127984616a..0a94c7d3672 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -120,16 +120,14 @@ agx_emit_extract(agx_builder *b, agx_index vec, unsigned channel) } static void -agx_cache_combine(agx_builder *b, agx_index dst, - agx_index s0, agx_index s1, agx_index s2, agx_index s3) +agx_cache_combine(agx_builder *b, agx_index dst, unsigned nr_srcs, + agx_index *srcs) { /* Lifetime of a hash table entry has to be at least as long as the table */ - agx_index *channels = ralloc_array(b->shader, agx_index, 4); + agx_index *channels = ralloc_array(b->shader, agx_index, nr_srcs); - channels[0] = s0; - channels[1] = s1; - channels[2] = s2; - channels[3] = s3; + for (unsigned i = 0; i < nr_srcs; ++i) + channels[i] = srcs[i]; _mesa_hash_table_u64_insert(b->shader->allocated_vec, agx_index_to_key(dst), channels); @@ -142,11 +140,34 @@ agx_cache_combine(agx_builder *b, agx_index dst, * To optimize vector extractions, we record the individual channels */ static agx_instr * -agx_emit_combine_to(agx_builder *b, agx_index dst, - agx_index s0, agx_index s1, agx_index s2, agx_index s3) +agx_emit_combine_to(agx_builder *b, agx_index dst, unsigned nr_srcs, + agx_index *srcs) { - agx_cache_combine(b, dst, s0, s1, s2, s3); - return agx_p_combine_to(b, dst, s0, s1, s2, s3); + agx_cache_combine(b, dst, 4, srcs); + agx_instr *I = agx_p_combine_to(b, dst, nr_srcs); + + agx_foreach_src(I, s) + I->src[s] = srcs[s]; + + return I; +} + +static agx_index +agx_vec4(agx_builder *b, agx_index s0, agx_index s1, agx_index s2, agx_index s3) +{ + agx_index dst = agx_temp(b->shader, s0.size); + agx_index idx[4] = { s0, s1, s2, s3 }; + agx_emit_combine_to(b, dst, 4, idx); + return dst; +} + +static agx_index +agx_vec2(agx_builder *b, agx_index s0, agx_index s1) +{ + agx_index dst = agx_temp(b->shader, s0.size); + agx_index idx[2] = { s0, s1 }; + agx_emit_combine_to(b, dst, 2, idx); + return dst; } static void @@ -197,7 +218,7 @@ agx_emit_cached_split(agx_builder *b, agx_index vec, unsigned n) { agx_index dests[4] = { agx_null(), agx_null(), agx_null(), agx_null() }; agx_emit_split(b, dests, vec, n); - agx_cache_combine(b, vec, dests[0], dests[1], dests[2], dests[3]); + agx_cache_combine(b, vec, n, dests); } static void @@ -654,7 +675,7 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr) * If only individual components are accessed, this combine will be dead code * eliminated. */ - return agx_emit_combine_to(b, dst, dests[0], dests[1], dests[2], dests[3]); + return agx_emit_combine_to(b, dst, 4, dests); } static agx_index @@ -926,7 +947,10 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr) case nir_op_vec2: case nir_op_vec3: case nir_op_vec4: - return agx_emit_combine_to(b, dst, s0, s1, s2, s3); + { + agx_index idx[] = { s0, s1, s2, s3 }; + return agx_emit_combine_to(b, dst, 4, idx); + } case nir_op_vec8: case nir_op_vec16: @@ -1049,7 +1073,7 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr) agx_mov_to(b, layer32, layer); channels[nr - 1] = layer32; - coords = agx_p_combine(b, channels[0], channels[1], channels[2], channels[3]); + coords = agx_vec4(b, channels[0], channels[1], channels[2], channels[3]); } else { coords = index; } diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index 7774866d687..019765ad6bb 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -25,11 +25,16 @@ opcodes = {} immediates = {} enums = {} +VARIABLE = ~0 + class Opcode(object): - def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32): + def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, + encoding_16, encoding_32): self.name = name - self.dests = dests - self.srcs = srcs + self.dests = dests if dests != VARIABLE else 0 + self.srcs = srcs if srcs != VARIABLE else 0 + self.variable_srcs = (srcs == VARIABLE) + self.variable_dests = (dests == VARIABLE) self.imms = imms self.is_float = is_float self.can_eliminate = can_eliminate @@ -57,7 +62,8 @@ class Encoding(object): if self.extensible: assert(length_long == length_short + (4 if length_short > 8 else 2)) -def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, can_eliminate = True, encoding_16 = None): +def op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, + can_eliminate = True, encoding_16 = None): encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None @@ -258,7 +264,7 @@ op("or", _, srcs = 2) # Indicates the logical end of the block, before final branches/control flow op("p_logical_end", _, dests = 0, srcs = 0, can_eliminate = False) -op("p_combine", _, srcs = 4) +op("p_combine", _, srcs = VARIABLE) op("p_split", _, srcs = 1, dests = 4) # Phis are special-cased in the IR as they (uniquely) can take an unbounded diff --git a/src/asahi/compiler/agx_register_allocate.c b/src/asahi/compiler/agx_register_allocate.c index 0d4950f806e..1b9780a3f74 100644 --- a/src/asahi/compiler/agx_register_allocate.c +++ b/src/asahi/compiler/agx_register_allocate.c @@ -47,16 +47,7 @@ agx_write_registers(agx_instr *I, unsigned d) case AGX_OPCODE_LDCF: return 6; case AGX_OPCODE_P_COMBINE: - { - unsigned components = 0; - - for (unsigned i = 0; i < 4; ++i) { - if (!agx_is_null(I->src[i])) - components = i + 1; - } - - return components * size; - } + return I->nr_srcs * size; default: return size; } @@ -325,11 +316,11 @@ agx_ra(agx_context *ctx) unsigned base = agx_index_to_reg(ssa_to_reg, ins->dest[0]); unsigned width = agx_size_align_16(ins->dest[0].size); - struct agx_copy copies[4]; + struct agx_copy *copies = alloca(sizeof(copies[0]) * ins->nr_srcs); unsigned n = 0; /* Move the sources */ - for (unsigned i = 0; i < 4; ++i) { + agx_foreach_src(ins, i) { if (agx_is_null(ins->src[i])) continue; assert(ins->src[i].size == ins->dest[0].size); diff --git a/src/asahi/compiler/test/test-optimizer.cpp b/src/asahi/compiler/test/test-optimizer.cpp index 1651142a123..ab810298b99 100644 --- a/src/asahi/compiler/test/test-optimizer.cpp +++ b/src/asahi/compiler/test/test-optimizer.cpp @@ -105,7 +105,13 @@ TEST_F(Optimizer, Copyprop) TEST_F(Optimizer, InlineHazards) { - NEGCASE(agx_p_combine_to(b, wx, agx_mov_imm(b, AGX_SIZE_32, 0), wy, wz, wz)); + NEGCASE({ + agx_instr *I = agx_p_combine_to(b, wx, 4); + I->src[0] = agx_mov_imm(b, AGX_SIZE_32, 0); + I->src[1] = wy; + I->src[2] = wz; + I->src[3] = wz; + }); } TEST_F(Optimizer, CopypropRespectsAbsNeg)