diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 3a0db771a05..efe4611c9d5 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -1196,10 +1196,13 @@ can_swap_operands(aco_ptr& instr, aco_opcode* new_op, unsigned idx0 } } -wait_imm::wait_imm() : exp(unset_counter), lgkm(unset_counter), vm(unset_counter), vs(unset_counter) +wait_imm::wait_imm() + : exp(unset_counter), lgkm(unset_counter), vm(unset_counter), vs(unset_counter), + sample(unset_counter), bvh(unset_counter), km(unset_counter) {} wait_imm::wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_) - : exp(exp_), lgkm(lgkm_), vm(vm_), vs(vs_) + : exp(exp_), lgkm(lgkm_), vm(vm_), vs(vs_), sample(unset_counter), bvh(unset_counter), + km(unset_counter) {} uint16_t @@ -1241,6 +1244,9 @@ wait_imm::max(enum amd_gfx_level gfx_level) imm.exp = 7; imm.lgkm = gfx_level >= GFX10 ? 63 : 15; imm.vs = gfx_level >= GFX10 ? 63 : 0; + imm.sample = gfx_level >= GFX12 ? 63 : 0; + imm.bvh = gfx_level >= GFX12 ? 7 : 0; + imm.km = gfx_level >= GFX12 ? 31 : 0; return imm; } @@ -1253,7 +1259,31 @@ wait_imm::unpack(enum amd_gfx_level gfx_level, const Instruction* instr) aco_opcode op = instr->opcode; uint16_t packed = instr->salu().imm; - if (op == aco_opcode::s_waitcnt_expcnt) { + if (op == aco_opcode::s_wait_loadcnt) { + vm = std::min(vm, packed); + } else if (op == aco_opcode::s_wait_storecnt) { + vs = std::min(vs, packed); + } else if (op == aco_opcode::s_wait_samplecnt) { + sample = std::min(sample, packed); + } else if (op == aco_opcode::s_wait_bvhcnt) { + bvh = std::min(bvh, packed); + } else if (op == aco_opcode::s_wait_expcnt) { + exp = std::min(exp, packed); + } else if (op == aco_opcode::s_wait_dscnt) { + lgkm = std::min(lgkm, packed); + } else if (op == aco_opcode::s_wait_kmcnt) { + km = std::min(km, packed); + } else if (op == aco_opcode::s_wait_loadcnt_dscnt) { + uint32_t vm2 = (packed >> 8) & 0x3f; + uint32_t ds = packed & 0x3f; + vm = std::min(vm, vm2 == 0x3f ? wait_imm::unset_counter : vm2); + lgkm = std::min(lgkm, ds == 0x3f ? wait_imm::unset_counter : ds); + } else if (op == aco_opcode::s_wait_storecnt_dscnt) { + uint32_t vs2 = (packed >> 8) & 0x3f; + uint32_t ds = packed & 0x3f; + vs = std::min(vs, vs2 == 0x3f ? wait_imm::unset_counter : vs2); + lgkm = std::min(lgkm, ds == 0x3f ? wait_imm::unset_counter : ds); + } else if (op == aco_opcode::s_waitcnt_expcnt) { exp = std::min(exp, packed); } else if (op == aco_opcode::s_waitcnt_lgkmcnt) { lgkm = std::min(lgkm, packed); @@ -1325,6 +1355,9 @@ wait_imm::print(FILE* output) const names[wait_type_vm] = "vm"; names[wait_type_lgkm] = "lgkm"; names[wait_type_vs] = "vs"; + names[wait_type_sample] = "sample"; + names[wait_type_bvh] = "bvh"; + names[wait_type_km] = "km"; for (unsigned i = 0; i < wait_type_num; i++) { if ((*this)[i] != unset_counter) fprintf(output, "%s: %u\n", names[i], (*this)[i]); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 171d0ec7c48..97049361859 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -179,7 +179,11 @@ enum wait_type { wait_type_vm = 2, /* GFX10+ */ wait_type_vs = 3, - wait_type_num = 4, + /* GFX12+ */ + wait_type_sample = 4, + wait_type_bvh = 5, + wait_type_km = 6, + wait_type_num = 7, }; struct Instruction; @@ -191,6 +195,9 @@ struct wait_imm { uint8_t lgkm; uint8_t vm; uint8_t vs; + uint8_t sample; + uint8_t bvh; + uint8_t km; wait_imm(); wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_); @@ -223,6 +230,9 @@ static_assert(offsetof(wait_imm, exp) == wait_type_exp); static_assert(offsetof(wait_imm, lgkm) == wait_type_lgkm); static_assert(offsetof(wait_imm, vm) == wait_type_vm); static_assert(offsetof(wait_imm, vs) == wait_type_vs); +static_assert(offsetof(wait_imm, sample) == wait_type_sample); +static_assert(offsetof(wait_imm, bvh) == wait_type_bvh); +static_assert(offsetof(wait_imm, km) == wait_type_km); /* s_wait_event immediate bits. */ enum wait_event_imm : uint16_t { diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index 0009f01640c..ee893295247 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -274,20 +274,35 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins case Format::SOPP: { uint16_t imm = instr->salu().imm; switch (instr->opcode) { - case aco_opcode::s_waitcnt: { + case aco_opcode::s_waitcnt: + case aco_opcode::s_wait_loadcnt_dscnt: + case aco_opcode::s_wait_storecnt_dscnt: { wait_imm unpacked; unpacked.unpack(gfx_level, instr); const char* names[wait_type_num]; names[wait_type_exp] = "expcnt"; - names[wait_type_vm] = "vmcnt"; - names[wait_type_lgkm] = "lgkmcnt"; - names[wait_type_vs] = "vscnt"; + names[wait_type_vm] = gfx_level >= GFX12 ? "loadcnt" : "vmcnt"; + names[wait_type_lgkm] = gfx_level >= GFX12 ? "dscnt" : "lgkmcnt"; + names[wait_type_vs] = gfx_level >= GFX12 ? "storecnt" : "vscnt"; + names[wait_type_sample] = "samplecnt"; + names[wait_type_bvh] = "bvhcnt"; + names[wait_type_km] = "kmcnt"; for (unsigned i = 0; i < wait_type_num; i++) { if (unpacked[i] != wait_imm::unset_counter) fprintf(output, " %s(%d)", names[i], unpacked[i]); } break; } + case aco_opcode::s_wait_expcnt: + case aco_opcode::s_wait_dscnt: + case aco_opcode::s_wait_loadcnt: + case aco_opcode::s_wait_storecnt: + case aco_opcode::s_wait_samplecnt: + case aco_opcode::s_wait_bvhcnt: + case aco_opcode::s_wait_kmcnt: { + fprintf(output, " imm:%u", imm); + break; + } case aco_opcode::s_waitcnt_depctr: { unsigned va_vdst = (imm >> 12) & 0xf; unsigned va_sdst = (imm >> 9) & 0x7;