aco: lower p_constaddr into separate instructions earlier

This allows them to be scheduled properly and simplifies the assembler a
little.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8994>
This commit is contained in:
Rhys Perry
2021-02-01 12:42:38 +00:00
parent ab957bb899
commit 0af7ff49fd
5 changed files with 48 additions and 45 deletions
+28 -42
View File
@@ -1,5 +1,6 @@
#include <vector>
#include <algorithm>
#include <map>
#include "aco_ir.h"
#include "aco_builder.h"
@@ -10,11 +11,16 @@
namespace aco {
struct constaddr_info {
unsigned getpc_end;
unsigned add_literal;
};
struct asm_context {
Program *program;
enum chip_class chip_class;
std::vector<std::pair<int, SOPP_instruction*>> branches;
std::vector<unsigned> constaddrs;
std::map<unsigned, constaddr_info> constaddrs;
const int16_t* opcode;
// TODO: keep track of branch instructions referring blocks
// and, when emitting the block, correct the offset in instr
@@ -45,39 +51,17 @@ static uint32_t get_sdwa_sel(unsigned sel, PhysReg reg)
void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
{
/* lower remaining pseudo-instructions */
if (instr->opcode == aco_opcode::p_constaddr) {
unsigned dest = instr->definitions[0].physReg();
unsigned offset = instr->operands[0].constantValue();
if (instr->opcode == aco_opcode::p_constaddr_getpc) {
ctx.constaddrs[instr->operands[0].constantValue()].getpc_end = out.size() + 1;
/* s_getpc_b64 dest[0:1] */
uint32_t encoding = (0b101111101 << 23);
uint32_t opcode = ctx.opcode[(int)aco_opcode::s_getpc_b64];
if (opcode >= 55 && ctx.chip_class <= GFX9) {
assert(ctx.chip_class == GFX9 && opcode < 60);
opcode = opcode - 4;
}
encoding |= dest << 16;
encoding |= opcode << 8;
out.push_back(encoding);
instr->opcode = aco_opcode::s_getpc_b64;
instr->operands.pop_back();
} else if (instr->opcode == aco_opcode::p_constaddr_addlo) {
ctx.constaddrs[instr->operands[1].constantValue()].add_literal = out.size() + 1;
/* s_add_u32 dest[0], dest[0], ... */
encoding = (0b10 << 30);
encoding |= ctx.opcode[(int)aco_opcode::s_add_u32] << 23;
encoding |= dest << 16;
encoding |= dest;
encoding |= 255 << 8;
out.push_back(encoding);
ctx.constaddrs.push_back(out.size());
out.push_back(offset);
/* s_addc_u32 dest[1], dest[1], 0 */
encoding = (0b10 << 30);
encoding |= ctx.opcode[(int)aco_opcode::s_addc_u32] << 23;
encoding |= (dest + 1) << 16;
encoding |= dest + 1;
encoding |= 128 << 8;
out.push_back(encoding);
return;
instr->opcode = aco_opcode::s_add_u32;
instr->operands[1] = Operand(0u);
instr->operands[1].setFixed(PhysReg(255));
}
uint32_t opcode = ctx.opcode[(int)instr->opcode];
@@ -798,14 +782,14 @@ static void insert_code(asm_context& ctx, std::vector<uint32_t>& out, unsigned i
for (; branch_it != ctx.branches.end(); ++branch_it)
branch_it->first += insert_count;
/* Find first constant address after the inserted code */
auto caddr_it = std::find_if(ctx.constaddrs.begin(), ctx.constaddrs.end(), [insert_before](const int &caddr_pos) -> bool {
return (unsigned)caddr_pos >= insert_before;
});
/* Update the locations of constant addresses */
for (; caddr_it != ctx.constaddrs.end(); ++caddr_it)
(*caddr_it) += insert_count;
/* Update the locations of p_constaddr instructions */
for (auto& constaddr : ctx.constaddrs) {
constaddr_info& info = constaddr.second;
if (info.getpc_end >= insert_before)
info.getpc_end += insert_count;
if (info.add_literal >= insert_before)
info.add_literal += insert_count;
}
}
static void fix_branches_gfx10(asm_context& ctx, std::vector<uint32_t>& out)
@@ -928,8 +912,10 @@ void fix_branches(asm_context& ctx, std::vector<uint32_t>& out)
void fix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out)
{
for (unsigned addr : ctx.constaddrs)
out[addr] += (out.size() - addr + 1u) * 4u;
for (auto& constaddr : ctx.constaddrs) {
constaddr_info& info = constaddr.second;
out[info.add_literal] += (out.size() - info.getpc_end) * 4u;
}
}
unsigned emit_program(Program* program,