From 162269f04981da3905b22dfc8afacadc619c71c2 Mon Sep 17 00:00:00 2001 From: Mary Date: Thu, 5 Jan 2023 23:57:39 +0100 Subject: [PATCH] nouveau/mme: Add Fermi builder Co-Authored-By: Jason Ekstrand Part-of: --- src/nouveau/mme/meson.build | 1 + src/nouveau/mme/mme_builder.h | 52 +- src/nouveau/mme/mme_fermi_builder.c | 754 ++++++++++++++++++++++++++++ src/nouveau/mme/mme_fermi_builder.h | 115 +++++ 4 files changed, 919 insertions(+), 3 deletions(-) create mode 100644 src/nouveau/mme/mme_fermi_builder.c create mode 100644 src/nouveau/mme/mme_fermi_builder.h diff --git a/src/nouveau/mme/meson.build b/src/nouveau/mme/meson.build index 50f6e515a22..c7001c5b069 100644 --- a/src/nouveau/mme/meson.build +++ b/src/nouveau/mme/meson.build @@ -74,6 +74,7 @@ libnouveau_mme_files = files( 'mme_builder.h', 'mme_fermi.c', 'mme_fermi.h', + 'mme_fermi_builder.c', 'mme_fermi_dump.c', 'mme_tu104.c', 'mme_tu104.h', diff --git a/src/nouveau/mme/mme_builder.h b/src/nouveau/mme/mme_builder.h index 3f0dc64b54e..4da0c29e2c3 100644 --- a/src/nouveau/mme/mme_builder.h +++ b/src/nouveau/mme/mme_builder.h @@ -59,13 +59,18 @@ struct mme_cf { struct mme_builder; #include "mme_tu104_builder.h" +#include "mme_fermi_builder.h" +#define MME_CLS_FERMI 0x9000 #define MME_CLS_TURING 0xc500 struct mme_builder { uint16_t cls; struct mme_reg_alloc reg_alloc; - struct mme_tu104_builder tu104; + union { + struct mme_tu104_builder tu104; + struct mme_fermi_builder fermi; + }; }; static inline void @@ -76,6 +81,8 @@ mme_builder_init(struct mme_builder *b, struct nv_device_info *dev) if (b->cls >= MME_CLS_TURING) mme_tu104_builder_init(b); + else if (b->cls >= MME_CLS_FERMI) + mme_fermi_builder_init(b); else unreachable("Unsupported GPU class"); } @@ -85,6 +92,8 @@ mme_builder_finish(struct mme_builder *b, size_t *size_out) { if (b->cls >= MME_CLS_TURING) return mme_tu104_builder_finish(&b->tu104, size_out); + else if (b->cls >= MME_CLS_FERMI) + return mme_fermi_builder_finish(&b->fermi, size_out); else unreachable("Unsupported GPU class"); } @@ -110,6 +119,8 @@ mme_alu_to(struct mme_builder *b, { if (b->cls >= MME_CLS_TURING) mme_tu104_alu_to(b, dst, op, x, y); + else if (b->cls >= MME_CLS_FERMI) + mme_fermi_alu_to(b, dst, op, x, y); else unreachable("Unsupported GPU class"); } @@ -144,6 +155,8 @@ mme_alu64_to(struct mme_builder *b, { if (b->cls >= MME_CLS_TURING) mme_tu104_alu64_to(b, dst, op_lo, op_hi, x, y); + else if (b->cls >= MME_CLS_FERMI) + mme_fermi_alu64_to(b, dst, op_lo, op_hi, x, y); else unreachable("Unsupported GPU class"); } @@ -318,6 +331,8 @@ mme_merge_to(struct mme_builder *b, struct mme_value dst, { if (b->cls >= MME_CLS_TURING) mme_tu104_merge_to(b, dst, x, y, dst_pos, bits, src_pos); + else if (b->cls >= MME_CLS_FERMI) + mme_fermi_merge_to(b, dst, x, y, dst_pos, bits, src_pos); else unreachable("Unsupported GPU class"); } @@ -344,6 +359,8 @@ mme_state_arr_to(struct mme_builder *b, struct mme_value dst, { if (b->cls >= MME_CLS_TURING) mme_tu104_state_arr_to(b, dst, state, index); + else if (b->cls >= MME_CLS_FERMI) + mme_fermi_state_arr_to(b, dst, state, index); else unreachable("Unsupported GPU class"); } @@ -385,18 +402,31 @@ mme_load_to(struct mme_builder *b, struct mme_value dst) { if (b->cls >= MME_CLS_TURING) mme_tu104_load_to(b, dst); + else if (b->cls >= MME_CLS_FERMI) + mme_fermi_load_to(b, dst); else unreachable("Unsupported GPU class"); } static inline struct mme_value -mme_load(struct mme_builder *b) +mme_tu104_load(struct mme_builder *b) { struct mme_value dst = mme_alloc_reg(b); - mme_load_to(b, dst); + mme_tu104_load_to(b, dst); return dst; } +static inline struct mme_value +mme_load(struct mme_builder *b) +{ + if (b->cls >= MME_CLS_TURING) + return mme_tu104_load(b); + else if (b->cls >= MME_CLS_FERMI) + return mme_fermi_load(b); + else + unreachable("Unsupported GPU class"); +} + static inline struct mme_value64 mme_load_addr64(struct mme_builder *b) { @@ -411,6 +441,8 @@ mme_mthd_arr(struct mme_builder *b, uint16_t mthd, { if (b->cls >= MME_CLS_TURING) mme_tu104_mthd(b, mthd, index); + else if (b->cls >= MME_CLS_FERMI) + mme_fermi_mthd_arr(b, mthd, index); else unreachable("Unsupported GPU class"); } @@ -427,6 +459,8 @@ mme_emit(struct mme_builder *b, { if (b->cls >= MME_CLS_TURING) mme_tu104_emit(b, data); + else if (b->cls >= MME_CLS_FERMI) + mme_fermi_emit(b, data); else unreachable("Unsupported GPU class"); } @@ -457,6 +491,8 @@ mme_start_loop(struct mme_builder *b, struct mme_value count) { if (b->cls >= MME_CLS_TURING) mme_tu104_start_loop(b, count); + else if (b->cls >= MME_CLS_FERMI) + mme_fermi_start_loop(b, count); else unreachable("Unsupported GPU class"); } @@ -466,6 +502,8 @@ mme_end_loop(struct mme_builder *b) { if (b->cls >= MME_CLS_TURING) mme_tu104_end_loop(b); + else if (b->cls >= MME_CLS_FERMI) + mme_fermi_end_loop(b); else unreachable("Unsupported GPU class"); } @@ -481,6 +519,8 @@ mme_start_if_##op(struct mme_builder *b, \ { \ if (b->cls >= MME_CLS_TURING) \ mme_tu104_start_if(b, MME_CMP_OP_##OP, if_true, x, y); \ + else if (b->cls >= MME_CLS_FERMI) \ + mme_fermi_start_if(b, MME_CMP_OP_##OP, if_true, x, y); \ else \ unreachable("Unsupported GPU class"); \ } @@ -503,6 +543,8 @@ mme_end_if(struct mme_builder *b) { if (b->cls >= MME_CLS_TURING) mme_tu104_end_if(b); + else if (b->cls >= MME_CLS_FERMI) + mme_fermi_end_if(b); else unreachable("Unsupported GPU class"); } @@ -516,6 +558,8 @@ mme_start_while(struct mme_builder *b) { if (b->cls >= MME_CLS_TURING) mme_tu104_start_while(b); + else if (b->cls >= MME_CLS_FERMI) + mme_fermi_start_while(b); else unreachable("Unsupported GPU class"); } @@ -527,6 +571,8 @@ mme_end_while_##op(struct mme_builder *b, \ { \ if (b->cls >= MME_CLS_TURING) \ mme_tu104_end_while(b, MME_CMP_OP_##OP, if_true, x, y); \ + else if (b->cls >= MME_CLS_FERMI) \ + mme_fermi_end_while(b, MME_CMP_OP_##OP, if_true, x, y); \ else \ unreachable("Unsupported GPU class"); \ } diff --git a/src/nouveau/mme/mme_fermi_builder.c b/src/nouveau/mme/mme_fermi_builder.c new file mode 100644 index 00000000000..60d755f0277 --- /dev/null +++ b/src/nouveau/mme/mme_fermi_builder.c @@ -0,0 +1,754 @@ +#include "mme_builder.h" + +#include +#include + +// NOTE: We reserve R0 (zero register) and R1 (contains the first parameter at start) +#define MME_FERMI_RESERVED_INST_MASK 0xFFFFFF03 +#define MME_FERMI_IMM_ADD_MAX_BITS 17 +#define MME_FERMI_IMM_ADD_MAX_SIZE ((1 << (MME_FERMI_IMM_ADD_MAX_BITS + 1)) - 1) + +// NOTE: As the add immediate operation work with sighed values, we drop the sign part. +#define MME_FERMI_IMM_LOAD_MAX_BITS (MME_FERMI_IMM_ADD_MAX_BITS - 1) +#define MME_FERMI_IMM_LOAD_MAX_SIZE ((1 << (MME_FERMI_IMM_LOAD_MAX_BITS + 1)) - 1) + +void +mme_fermi_builder_init(struct mme_builder *b) +{ + /* R0 is reserved for the zero register */ + mme_reg_alloc_init(&b->reg_alloc, 0xfe); + + /* Pre-allocate R1 for the first parameter value */ + ASSERTED struct mme_value r1 = mme_reg_alloc_alloc(&b->reg_alloc); + assert(r1.reg == 1); +} + +static inline bool +mme_fermi_is_zero_or_reg(struct mme_value x) +{ + switch (x.type) { + case MME_VALUE_TYPE_ZERO: return true; + case MME_VALUE_TYPE_IMM: return x.imm == 0; + case MME_VALUE_TYPE_REG: return true; + default: unreachable("Invalid MME value type"); + } +} + +static inline bool +mme_fermi_is_zero_or_imm(struct mme_value x) +{ + switch (x.type) { + case MME_VALUE_TYPE_ZERO: return true; + case MME_VALUE_TYPE_IMM: return true; + case MME_VALUE_TYPE_REG: return false; + default: unreachable("Invalid MME value type"); + } +} + +static inline enum mme_fermi_reg +mme_value_alu_reg(struct mme_value val) +{ + assert(mme_fermi_is_zero_or_reg(val)); + + switch (val.type) { + case MME_VALUE_TYPE_ZERO: + return MME_FERMI_REG_ZERO; + case MME_VALUE_TYPE_REG: + assert(val.reg > 0 && val.reg <= 7); + return MME_FERMI_REG_ZERO + val.reg; + case MME_VALUE_TYPE_IMM: + return MME_FERMI_REG_ZERO; + } + unreachable("Invalid value type"); +} + +static inline uint32_t +mme_value_alu_imm(struct mme_value val) +{ + assert(mme_fermi_is_zero_or_imm(val)); + + switch (val.type) { + case MME_VALUE_TYPE_ZERO: + return 0; + case MME_VALUE_TYPE_IMM: + return val.imm; + case MME_VALUE_TYPE_REG: + return 0; + } + unreachable("Invalid value type"); +} + +static inline void +mme_free_reg_if_tmp(struct mme_builder *b, + struct mme_value data, + struct mme_value maybe_tmp) +{ + if (!mme_is_zero(data) && + !mme_is_zero(maybe_tmp) && + data.type != maybe_tmp.type) + mme_free_reg(b, maybe_tmp); +} + +static void +mme_fermi_new_inst(struct mme_fermi_builder *b) +{ + struct mme_fermi_inst noop = { MME_FERMI_INST_DEFAULTS }; + assert(b->inst_count < ARRAY_SIZE(b->insts)); + b->insts[b->inst_count] = noop; + b->inst_count++; + b->inst_parts = 0; +} + +static struct mme_fermi_inst * +mme_fermi_cur_inst(struct mme_fermi_builder *b) +{ + assert(b->inst_count > 0 && b->inst_count < ARRAY_SIZE(b->insts)); + return &b->insts[b->inst_count - 1]; +} + +void +mme_fermi_add_inst(struct mme_builder *b, + const struct mme_fermi_inst *inst) +{ + struct mme_fermi_builder *fb = &b->fermi; + + if (fb->inst_parts || fb->inst_count == 0) + mme_fermi_new_inst(fb); + + *mme_fermi_cur_inst(fb) = *inst; + mme_fermi_new_inst(fb); +} + +static inline void +mme_fermi_set_inst_parts(struct mme_fermi_builder *b, + enum mme_fermi_instr_parts parts) +{ + assert(!(b->inst_parts & parts)); + b->inst_parts |= parts; +} + +static inline bool +mme_fermi_next_inst_can_fit_a_full_inst(struct mme_fermi_builder *b) +{ + return !mme_fermi_is_empty(b) && b->inst_parts == 0; +} + +void +mme_fermi_mthd_arr(struct mme_builder *b, + uint16_t mthd, struct mme_value index) +{ + struct mme_fermi_builder *fb = &b->fermi; + struct mme_value src_reg = mme_zero(); + + if (!mme_fermi_next_inst_can_fit_a_full_inst(fb)) + mme_fermi_new_inst(fb); + + struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb); + + uint32_t mthd_imm = (1 << 12) | (mthd >> 2); + + if (index.type == MME_VALUE_TYPE_REG) { + src_reg = index; + } else if (index.type == MME_VALUE_TYPE_IMM) { + mthd_imm += index.imm; + } + + inst->op = MME_FERMI_OP_ADD_IMM; + inst->src[0] = mme_value_alu_reg(src_reg); + inst->imm = mthd_imm; + inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR; + inst->dst = MME_FERMI_REG_ZERO; + + mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP | + MME_FERMI_INSTR_PART_ASSIGN); +} + +static inline bool +mme_fermi_prev_inst_can_emit(struct mme_fermi_builder *b, struct mme_value data) { + if (mme_fermi_is_empty(b)) { + return false; + } + + if ((b->inst_parts & MME_FERMI_INSTR_PART_ASSIGN) == MME_FERMI_INSTR_PART_ASSIGN) { + struct mme_fermi_inst *inst = mme_fermi_cur_inst(b); + + if (inst->assign_op == MME_FERMI_ASSIGN_OP_MOVE && data.type == MME_VALUE_TYPE_REG && + mme_value_alu_reg(data) == inst->dst) { + return true; + } + } + + return false; +} + +static inline bool +mme_fermi_next_inst_can_emit(struct mme_fermi_builder *fb, + struct mme_value data) +{ + if (mme_fermi_is_empty(fb)) + return false; + + if (fb->inst_parts == 0) + return true; + + return mme_fermi_prev_inst_can_emit(fb, data); +} + +static inline struct mme_value +mme_fermi_reg(uint32_t reg) +{ + struct mme_value val = { + .type = MME_VALUE_TYPE_REG, + .reg = reg, + }; + return val; +} + +static inline void +mme_fermi_add_imm17(struct mme_fermi_builder *fb, + struct mme_value dst_reg, + struct mme_value src_reg, + uint32_t val) +{ + assert(dst_reg.type == MME_VALUE_TYPE_REG && + mme_fermi_is_zero_or_reg(src_reg) && + val <= MME_FERMI_IMM_ADD_MAX_SIZE); + + if (!mme_fermi_next_inst_can_fit_a_full_inst(fb)) { + mme_fermi_new_inst(fb); + } + + struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb); + + inst->op = MME_FERMI_OP_ADD_IMM; + inst->src[0] = mme_value_alu_reg(src_reg); + inst->imm = val; + inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE; + inst->dst = mme_value_alu_reg(dst_reg); + + mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP | + MME_FERMI_INSTR_PART_ASSIGN); +} + +static bool +mme_fermi_bfe_lsl_can_use_imm(struct mme_fermi_builder *b, + struct mme_value src_bits, + struct mme_value dst_bits) +{ + return (mme_fermi_is_zero_or_reg(src_bits) && + mme_fermi_is_zero_or_imm(dst_bits) && + mme_value_alu_imm(dst_bits) <= 31); +} + +static bool +mme_fermi_bfe_lsl_can_use_reg(struct mme_fermi_builder *b, + struct mme_value src_bits, + struct mme_value dst_bits) +{ + return (mme_fermi_is_zero_or_imm(src_bits) && + mme_fermi_is_zero_or_reg(dst_bits) && + mme_value_alu_imm(src_bits) <= 31); +} + +static void +mme_fermi_bfe(struct mme_fermi_builder *fb, + struct mme_value dst_reg, + struct mme_value src_bits, + struct mme_value src_reg, + struct mme_value dst_bits, + struct mme_value size) +{ + assert(dst_reg.type == MME_VALUE_TYPE_REG && + mme_fermi_is_zero_or_reg(src_reg) && + (mme_fermi_bfe_lsl_can_use_imm(fb, src_bits, dst_bits) || + mme_fermi_bfe_lsl_can_use_reg(fb, src_bits, dst_bits))); + + if (!mme_fermi_next_inst_can_fit_a_full_inst(fb)) + mme_fermi_new_inst(fb); + + struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb); + + if (mme_fermi_bfe_lsl_can_use_imm(fb, src_bits, dst_bits)) { + inst->op = MME_FERMI_OP_BFE_LSL_IMM; + inst->src[0] = mme_value_alu_reg(src_bits); + inst->src[1] = mme_value_alu_reg(src_reg); + inst->bitfield.dst_bit = mme_value_alu_imm(dst_bits); + inst->bitfield.size = mme_value_alu_imm(size); + } else if (mme_fermi_bfe_lsl_can_use_reg(fb, src_bits, dst_bits)) { + inst->op = MME_FERMI_OP_BFE_LSL_REG; + inst->src[0] = mme_value_alu_reg(dst_bits); + inst->src[1] = mme_value_alu_reg(src_reg); + inst->bitfield.src_bit = mme_value_alu_imm(src_bits); + inst->bitfield.size = mme_value_alu_imm(size); + } + + inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE; + inst->dst = mme_value_alu_reg(dst_reg); + + mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP | + MME_FERMI_INSTR_PART_ASSIGN); +} + +static void +mme_fermi_sll_to(struct mme_fermi_builder *b, + struct mme_value dst, + struct mme_value x, + struct mme_value y) +{ + assert(mme_fermi_is_zero_or_reg(dst)); + + mme_fermi_bfe(b, dst, mme_zero(), x, y, mme_imm(31)); +} + +static void +mme_fermi_srl_to(struct mme_fermi_builder *b, + struct mme_value dst, + struct mme_value x, + struct mme_value y) +{ + assert(mme_fermi_is_zero_or_reg(dst)); + + mme_fermi_bfe(b, dst, y, x, mme_zero(), mme_imm(31)); +} + +static struct mme_value +mme_fermi_load_imm_to_reg(struct mme_builder *b, struct mme_value data) +{ + struct mme_fermi_builder *fb = &b->fermi; + + assert(data.type == MME_VALUE_TYPE_IMM || + data.type == MME_VALUE_TYPE_ZERO); + + /* If the immediate is zero, we can simplify this */ + if (mme_is_zero(data)) { + return mme_zero(); + } else { + uint32_t imm = data.imm; + + struct mme_value dst = mme_alloc_reg(b); + + if (imm > MME_FERMI_IMM_LOAD_MAX_SIZE) { + /* TODO: a possible optimisation involve searching for the first bit + * offset and see if it can fit in 16 bits. + */ + uint32_t high_bits = (imm >> (MME_FERMI_IMM_LOAD_MAX_BITS + 1)) & MME_FERMI_IMM_LOAD_MAX_SIZE; + uint32_t low_bits = imm & MME_FERMI_IMM_LOAD_MAX_SIZE; + + mme_fermi_add_imm17(fb, dst, mme_zero(), high_bits); + mme_fermi_sll_to(fb, dst, dst, + mme_imm(MME_FERMI_IMM_LOAD_MAX_BITS + 1)); + mme_fermi_add_imm17(fb, dst, dst, low_bits); + } else { + mme_fermi_add_imm17(fb, dst, mme_zero(), imm); + } + + return dst; + } +} + +static inline struct mme_value +mme_fermi_value_as_reg(struct mme_builder *b, + struct mme_value data) +{ + if (data.type == MME_VALUE_TYPE_REG || mme_is_zero(data)) { + return data; + } + + return mme_fermi_load_imm_to_reg(b, data); +} + +void mme_fermi_emit(struct mme_builder *b, + struct mme_value data) +{ + struct mme_fermi_builder *fb = &b->fermi; + struct mme_fermi_inst *inst; + + /* Check if previous assign was to the same dst register and modify assign + * mode if needed + */ + if (mme_fermi_prev_inst_can_emit(fb, data)) { + inst = mme_fermi_cur_inst(fb); + inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_EMIT; + } else { + struct mme_value data_reg = mme_fermi_value_as_reg(b, data); + + /* Because of mme_fermi_value_as_reg, it is possible that a new load + * that can be simplify + */ + if (mme_fermi_prev_inst_can_emit(fb, data_reg)) { + inst = mme_fermi_cur_inst(fb); + inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_EMIT; + } else { + if (!mme_fermi_next_inst_can_emit(fb, data)) + mme_fermi_new_inst(fb); + + inst = mme_fermi_cur_inst(fb); + inst->op = MME_FERMI_OP_ALU_REG; + inst->alu_op = MME_FERMI_ALU_OP_ADD; + inst->src[0] = mme_value_alu_reg(data_reg); + inst->src[1] = MME_FERMI_REG_ZERO; + inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_EMIT; + inst->dst = MME_FERMI_REG_ZERO; + + mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP | + MME_FERMI_INSTR_PART_ASSIGN); + } + + mme_free_reg_if_tmp(b, data, data_reg); + } +} + +static void +mme_fermi_branch(struct mme_fermi_builder *fb, + enum mme_fermi_reg src, int32_t offset, bool if_zero) +{ + if (fb->inst_parts || mme_fermi_is_empty(fb)) + mme_fermi_new_inst(fb); + + struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb); + + inst->op = MME_FERMI_OP_BRANCH; + inst->src[0] = src; + inst->imm = offset; + inst->branch.no_delay = true; + inst->branch.not_zero = if_zero; + + mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP | + MME_FERMI_INSTR_PART_ASSIGN); +} + +static void +mme_fermi_start_cf(struct mme_builder *b, + enum mme_cf_type type, + struct mme_value cond, + bool is_zero) +{ + struct mme_fermi_builder *fb = &b->fermi; + + /* The condition here is inverted because we want to branch and skip the + * block when the condition fails. + */ + assert(mme_fermi_is_zero_or_reg(cond)); + mme_fermi_branch(fb, mme_value_alu_reg(cond), 0, is_zero); + + uint16_t ip = fb->inst_count - 1; + assert(fb->insts[ip].op == MME_FERMI_OP_BRANCH); + + assert(fb->cf_depth < ARRAY_SIZE(fb->cf_stack)); + fb->cf_stack[fb->cf_depth++] = (struct mme_cf) { + .type = type, + .start_ip = ip, + }; + + /* The inside of control-flow needs to start with a new instruction */ + mme_fermi_new_inst(fb); +} + +static struct mme_cf +mme_fermi_end_cf(struct mme_builder *b, enum mme_cf_type type) +{ + struct mme_fermi_builder *fb = &b->fermi; + + if (fb->inst_parts) + mme_fermi_new_inst(fb); + + assert(fb->cf_depth > 0); + struct mme_cf cf = fb->cf_stack[--fb->cf_depth]; + assert(cf.type == type); + + assert(fb->insts[cf.start_ip].op == MME_FERMI_OP_BRANCH); + fb->insts[cf.start_ip].imm = fb->inst_count - cf.start_ip - 1; + + return cf; +} + +void +mme_fermi_start_if(struct mme_builder *b, + enum mme_cmp_op op, + bool if_true, + struct mme_value x, + struct mme_value y) +{ + assert(op == MME_CMP_OP_EQ); + + if (mme_is_zero(x)) { + mme_fermi_start_cf(b, MME_CF_TYPE_IF, y, if_true); + } else if (mme_is_zero(y)) { + mme_fermi_start_cf(b, MME_CF_TYPE_IF, x, if_true); + } else { + struct mme_value tmp = mme_xor(b, x, y); + mme_fermi_start_cf(b, MME_CF_TYPE_IF, tmp, if_true); + mme_free_reg(b, tmp); + } +} + +void +mme_fermi_end_if(struct mme_builder *b) +{ + mme_fermi_end_cf(b, MME_CF_TYPE_IF); +} + +void +mme_fermi_start_while(struct mme_builder *b) +{ + mme_fermi_start_cf(b, MME_CF_TYPE_WHILE, mme_zero(), false); +} + +static void +mme_fermi_end_while_zero(struct mme_builder *b, + struct mme_value cond, + bool is_zero) +{ + struct mme_fermi_builder *fb = &b->fermi; + + struct mme_cf cf = mme_fermi_end_cf(b, MME_CF_TYPE_WHILE); + + int delta = fb->inst_count - cf.start_ip - 2; + mme_fermi_branch(fb, mme_value_alu_reg(cond), -delta, !is_zero); +} + +void +mme_fermi_end_while(struct mme_builder *b, + enum mme_cmp_op op, + bool if_true, + struct mme_value x, + struct mme_value y) +{ + assert(op == MME_CMP_OP_EQ); + + if (mme_is_zero(x)) { + mme_fermi_end_while_zero(b, y, if_true); + } else if (mme_is_zero(y)) { + mme_fermi_end_while_zero(b, x, if_true); + } else { + struct mme_value tmp = mme_xor(b, x, y); + mme_fermi_end_while_zero(b, tmp, if_true); + mme_free_reg(b, tmp); + } +} + +void +mme_fermi_start_loop(struct mme_builder *b, + struct mme_value count) +{ + struct mme_fermi_builder *fb = &b->fermi; + + assert(mme_is_zero(fb->loop_counter)); + fb->loop_counter = mme_mov(b, count); + + mme_start_while(b); +} + +void +mme_fermi_end_loop(struct mme_builder *b) +{ + struct mme_fermi_builder *fb = &b->fermi; + + mme_sub_to(b, fb->loop_counter, fb->loop_counter, mme_imm(1)); + mme_fermi_end_while_zero(b, fb->loop_counter, false); + + mme_free_reg(b, fb->loop_counter); + fb->loop_counter = mme_zero(); +} + +static inline bool +mme_fermi_next_inst_can_load_to(struct mme_fermi_builder *b) +{ + return !mme_fermi_is_empty(b) && !(b->inst_parts & MME_FERMI_INSTR_PART_ASSIGN); +} + +void mme_fermi_load_to(struct mme_builder *b, + struct mme_value dst) +{ + struct mme_fermi_builder *fb = &b->fermi; + + assert(dst.type == MME_VALUE_TYPE_REG || + dst.type == MME_VALUE_TYPE_ZERO); + + if (!fb->first_loaded) { + struct mme_value r1 = { + .type = MME_VALUE_TYPE_REG, + .reg = 1, + }; + mme_mov_to(b, dst, r1); + mme_free_reg(b, r1); + fb->first_loaded = true; + return; + } + + if (!mme_fermi_next_inst_can_load_to(fb)) + mme_fermi_new_inst(fb); + + struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb); + + inst->assign_op = MME_FERMI_ASSIGN_OP_LOAD; + inst->dst = mme_value_alu_reg(dst); + + mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_ASSIGN); +} + + +struct mme_value +mme_fermi_load(struct mme_builder *b) +{ + struct mme_fermi_builder *fb = &b->fermi; + + if (!fb->first_loaded) { + struct mme_value r1 = { + .type = MME_VALUE_TYPE_REG, + .reg = 1, + }; + fb->first_loaded = true; + return r1; + } + + struct mme_value dst = mme_alloc_reg(b); + mme_fermi_load_to(b, dst); + + return dst; +} + +static enum mme_tu104_alu_op +mme_to_fermi_alu_op(enum mme_alu_op op) +{ + switch (op) { +#define ALU_CASE(op) case MME_ALU_OP_##op: return MME_FERMI_ALU_OP_##op; + ALU_CASE(ADD) + ALU_CASE(ADDC) + ALU_CASE(SUB) + ALU_CASE(SUBB) + ALU_CASE(AND) + ALU_CASE(NAND) + ALU_CASE(OR) + ALU_CASE(XOR) +#undef ALU_CASE + default: + unreachable("Unsupported MME ALU op"); + } +} + +void +mme_fermi_alu_to(struct mme_builder *b, + struct mme_value dst, + enum mme_alu_op op, + struct mme_value x, + struct mme_value y) +{ + struct mme_fermi_builder *fb = &b->fermi; + + switch (op) { + case MME_ALU_OP_SLL: + mme_fermi_sll_to(fb, dst, x, y); + return; + case MME_ALU_OP_SRL: + mme_fermi_srl_to(fb, dst, x, y); + return; + default: + break; + } + + assert(mme_fermi_is_zero_or_reg(dst)); + + struct mme_value x_reg = mme_fermi_value_as_reg(b, x); + struct mme_value y_reg = mme_fermi_value_as_reg(b, y); + + if (!mme_fermi_next_inst_can_fit_a_full_inst(fb)) + mme_fermi_new_inst(fb); + + struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb); + inst->op = MME_FERMI_OP_ALU_REG; + inst->alu_op = mme_to_fermi_alu_op(op); + inst->src[0] = mme_value_alu_reg(x_reg); + inst->src[1] = mme_value_alu_reg(y_reg); + inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE; + inst->dst = mme_value_alu_reg(dst); + + mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP | + MME_FERMI_INSTR_PART_ASSIGN); + + mme_free_reg_if_tmp(b, x, x_reg); + mme_free_reg_if_tmp(b, y, y_reg); +} + + +void mme_fermi_state_arr_to(struct mme_builder *b, + struct mme_value dst, + uint16_t state, + struct mme_value index) +{ + struct mme_fermi_builder *fb = &b->fermi; + + assert(mme_fermi_is_zero_or_reg(dst)); + assert(state % 4 == 0); + + struct mme_value index_reg = mme_fermi_value_as_reg(b, index); + + if (!mme_fermi_next_inst_can_fit_a_full_inst(fb)) + mme_fermi_new_inst(fb); + + struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb); + inst->op = MME_FERMI_OP_STATE; + inst->src[0] = mme_value_alu_reg(index_reg); + inst->src[1] = MME_FERMI_REG_ZERO; + inst->imm = state >> 2; + inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE; + inst->dst = mme_value_alu_reg(dst); + + mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP | + MME_FERMI_INSTR_PART_ASSIGN); + + mme_free_reg_if_tmp(b, index, index_reg); +} + +void +mme_fermi_merge_to(struct mme_builder *b, struct mme_value dst, + struct mme_value x, struct mme_value y, + uint16_t dst_pos, uint16_t bits, uint16_t src_pos) +{ + struct mme_fermi_builder *fb = &b->fermi; + + assert(mme_fermi_is_zero_or_reg(dst)); + assert(dst_pos < 32); + assert(bits < 32); + assert(src_pos < 32); + + if (!mme_fermi_next_inst_can_fit_a_full_inst(fb)) + mme_fermi_new_inst(fb); + + struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb); + + inst->op = MME_FERMI_OP_MERGE; + inst->src[0] = mme_value_alu_reg(x); + inst->src[1] = mme_value_alu_reg(y); + inst->bitfield.dst_bit = dst_pos; + inst->bitfield.src_bit = src_pos; + inst->bitfield.size = bits; + + inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE; + inst->dst = mme_value_alu_reg(dst); + mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP | + MME_FERMI_INSTR_PART_ASSIGN); +} + +uint32_t * +mme_fermi_builder_finish(struct mme_fermi_builder *b, size_t *size_out) +{ + assert(b->cf_depth == 0); + + /* TODO: If there are at least two instructions and we can guarantee the + * last two instructions get exeucted (not in control-flow), we don't need + * to add a pair of NOPs. + */ + mme_fermi_new_inst(b); + mme_fermi_new_inst(b); + + b->insts[b->inst_count - 2].end_next = true; + + size_t enc_size = b->inst_count * sizeof(uint32_t); + uint32_t *enc = malloc(enc_size); + if (enc != NULL) { + mme_fermi_encode(enc, b->inst_count, b->insts); + *size_out = enc_size; + } + return enc; +} diff --git a/src/nouveau/mme/mme_fermi_builder.h b/src/nouveau/mme/mme_fermi_builder.h new file mode 100644 index 00000000000..ea0deaaf453 --- /dev/null +++ b/src/nouveau/mme/mme_fermi_builder.h @@ -0,0 +1,115 @@ +#ifndef MME_BUILDER_H +#error "This file must only be included by mme_builder.h" +#endif + +#include "mme_fermi.h" +#include "mme_value.h" + +#include "util/bitscan.h" +#include "util/enum_operators.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MME_FERMI_BUILDER_MAX_INSTS 128 + +enum mme_fermi_instr_parts { + MME_FERMI_INSTR_PART_OP = BITFIELD_BIT(0), + MME_FERMI_INSTR_PART_ASSIGN = BITFIELD_BIT(1) +}; + +struct mme_fermi_builder { + bool first_loaded; + uint32_t inst_count; + enum mme_fermi_instr_parts inst_parts; + struct mme_fermi_inst insts[MME_FERMI_BUILDER_MAX_INSTS]; + uint32_t cf_depth; + struct mme_value loop_counter; + struct mme_cf cf_stack[8]; +}; + +void mme_fermi_builder_init(struct mme_builder *b); + +uint32_t * mme_fermi_builder_finish(struct mme_fermi_builder *b, size_t *size_out); + +void mme_fermi_add_inst(struct mme_builder *b, + const struct mme_fermi_inst *inst); + +static inline bool +mme_fermi_is_empty(struct mme_fermi_builder *b) +{ + return b->inst_count == 0; +} + +#define mme_fermi_asm(b, __inst) \ + for (struct mme_fermi_inst __inst = { MME_FERMI_INST_DEFAULTS }; \ + !__inst.end_next; \ + mme_fermi_add_inst((b), &__inst), __inst.end_next = true) + +void mme_fermi_mthd_arr(struct mme_builder *b, + uint16_t mthd, + struct mme_value index); + +void mme_fermi_emit(struct mme_builder *b, + struct mme_value data); + +void mme_fermi_start_loop(struct mme_builder *b, + struct mme_value count); +void mme_fermi_end_loop(struct mme_builder *b); + +void mme_fermi_start_if(struct mme_builder *b, + enum mme_cmp_op op, + bool if_true, + struct mme_value x, + struct mme_value y); +void mme_fermi_end_if(struct mme_builder *b); + +void mme_fermi_start_while(struct mme_builder *b); +void mme_fermi_end_while(struct mme_builder *b, + enum mme_cmp_op op, + bool if_true, + struct mme_value x, + struct mme_value y); + +void mme_fermi_load_to(struct mme_builder *b, + struct mme_value dst); + +struct mme_value mme_fermi_load(struct mme_builder *b); + +void +mme_fermi_alu_to(struct mme_builder *b, + struct mme_value dst, + enum mme_alu_op op, + struct mme_value x, + struct mme_value y); + +static inline void +mme_fermi_alu64_to(struct mme_builder *b, + struct mme_value64 dst, + enum mme_alu_op op_lo, + enum mme_alu_op op_hi, + struct mme_value64 x, + struct mme_value64 y) +{ + assert(dst.lo.type == MME_VALUE_TYPE_REG); + assert(dst.hi.type == MME_VALUE_TYPE_REG); + + mme_fermi_alu_to(b, dst.lo, op_lo, x.lo, y.lo); + mme_fermi_alu_to(b, dst.hi, op_hi, x.hi, y.hi); +} + +void +mme_fermi_merge_to(struct mme_builder *b, struct mme_value dst, + struct mme_value x, struct mme_value y, + uint16_t dst_pos, uint16_t bits, uint16_t src_pos); + +void mme_fermi_state_arr_to(struct mme_builder *b, + struct mme_value dst, + uint16_t state, + struct mme_value index); + +#ifdef __cplusplus +} +#endif +