nouveau/mme: Add Fermi builder

Co-Authored-By: Jason Ekstrand <jason.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>
This commit is contained in:
Mary
2023-01-05 23:57:39 +01:00
committed by Marge Bot
parent 663258be5e
commit 162269f049
4 changed files with 919 additions and 3 deletions
+1
View File
@@ -74,6 +74,7 @@ libnouveau_mme_files = files(
'mme_builder.h',
'mme_fermi.c',
'mme_fermi.h',
'mme_fermi_builder.c',
'mme_fermi_dump.c',
'mme_tu104.c',
'mme_tu104.h',
+49 -3
View File
@@ -59,13 +59,18 @@ struct mme_cf {
struct mme_builder;
#include "mme_tu104_builder.h"
#include "mme_fermi_builder.h"
#define MME_CLS_FERMI 0x9000
#define MME_CLS_TURING 0xc500
struct mme_builder {
uint16_t cls;
struct mme_reg_alloc reg_alloc;
struct mme_tu104_builder tu104;
union {
struct mme_tu104_builder tu104;
struct mme_fermi_builder fermi;
};
};
static inline void
@@ -76,6 +81,8 @@ mme_builder_init(struct mme_builder *b, struct nv_device_info *dev)
if (b->cls >= MME_CLS_TURING)
mme_tu104_builder_init(b);
else if (b->cls >= MME_CLS_FERMI)
mme_fermi_builder_init(b);
else
unreachable("Unsupported GPU class");
}
@@ -85,6 +92,8 @@ mme_builder_finish(struct mme_builder *b, size_t *size_out)
{
if (b->cls >= MME_CLS_TURING)
return mme_tu104_builder_finish(&b->tu104, size_out);
else if (b->cls >= MME_CLS_FERMI)
return mme_fermi_builder_finish(&b->fermi, size_out);
else
unreachable("Unsupported GPU class");
}
@@ -110,6 +119,8 @@ mme_alu_to(struct mme_builder *b,
{
if (b->cls >= MME_CLS_TURING)
mme_tu104_alu_to(b, dst, op, x, y);
else if (b->cls >= MME_CLS_FERMI)
mme_fermi_alu_to(b, dst, op, x, y);
else
unreachable("Unsupported GPU class");
}
@@ -144,6 +155,8 @@ mme_alu64_to(struct mme_builder *b,
{
if (b->cls >= MME_CLS_TURING)
mme_tu104_alu64_to(b, dst, op_lo, op_hi, x, y);
else if (b->cls >= MME_CLS_FERMI)
mme_fermi_alu64_to(b, dst, op_lo, op_hi, x, y);
else
unreachable("Unsupported GPU class");
}
@@ -318,6 +331,8 @@ mme_merge_to(struct mme_builder *b, struct mme_value dst,
{
if (b->cls >= MME_CLS_TURING)
mme_tu104_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
else if (b->cls >= MME_CLS_FERMI)
mme_fermi_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
else
unreachable("Unsupported GPU class");
}
@@ -344,6 +359,8 @@ mme_state_arr_to(struct mme_builder *b, struct mme_value dst,
{
if (b->cls >= MME_CLS_TURING)
mme_tu104_state_arr_to(b, dst, state, index);
else if (b->cls >= MME_CLS_FERMI)
mme_fermi_state_arr_to(b, dst, state, index);
else
unreachable("Unsupported GPU class");
}
@@ -385,18 +402,31 @@ mme_load_to(struct mme_builder *b, struct mme_value dst)
{
if (b->cls >= MME_CLS_TURING)
mme_tu104_load_to(b, dst);
else if (b->cls >= MME_CLS_FERMI)
mme_fermi_load_to(b, dst);
else
unreachable("Unsupported GPU class");
}
static inline struct mme_value
mme_load(struct mme_builder *b)
mme_tu104_load(struct mme_builder *b)
{
struct mme_value dst = mme_alloc_reg(b);
mme_load_to(b, dst);
mme_tu104_load_to(b, dst);
return dst;
}
static inline struct mme_value
mme_load(struct mme_builder *b)
{
if (b->cls >= MME_CLS_TURING)
return mme_tu104_load(b);
else if (b->cls >= MME_CLS_FERMI)
return mme_fermi_load(b);
else
unreachable("Unsupported GPU class");
}
static inline struct mme_value64
mme_load_addr64(struct mme_builder *b)
{
@@ -411,6 +441,8 @@ mme_mthd_arr(struct mme_builder *b, uint16_t mthd,
{
if (b->cls >= MME_CLS_TURING)
mme_tu104_mthd(b, mthd, index);
else if (b->cls >= MME_CLS_FERMI)
mme_fermi_mthd_arr(b, mthd, index);
else
unreachable("Unsupported GPU class");
}
@@ -427,6 +459,8 @@ mme_emit(struct mme_builder *b,
{
if (b->cls >= MME_CLS_TURING)
mme_tu104_emit(b, data);
else if (b->cls >= MME_CLS_FERMI)
mme_fermi_emit(b, data);
else
unreachable("Unsupported GPU class");
}
@@ -457,6 +491,8 @@ mme_start_loop(struct mme_builder *b, struct mme_value count)
{
if (b->cls >= MME_CLS_TURING)
mme_tu104_start_loop(b, count);
else if (b->cls >= MME_CLS_FERMI)
mme_fermi_start_loop(b, count);
else
unreachable("Unsupported GPU class");
}
@@ -466,6 +502,8 @@ mme_end_loop(struct mme_builder *b)
{
if (b->cls >= MME_CLS_TURING)
mme_tu104_end_loop(b);
else if (b->cls >= MME_CLS_FERMI)
mme_fermi_end_loop(b);
else
unreachable("Unsupported GPU class");
}
@@ -481,6 +519,8 @@ mme_start_if_##op(struct mme_builder *b, \
{ \
if (b->cls >= MME_CLS_TURING) \
mme_tu104_start_if(b, MME_CMP_OP_##OP, if_true, x, y); \
else if (b->cls >= MME_CLS_FERMI) \
mme_fermi_start_if(b, MME_CMP_OP_##OP, if_true, x, y); \
else \
unreachable("Unsupported GPU class"); \
}
@@ -503,6 +543,8 @@ mme_end_if(struct mme_builder *b)
{
if (b->cls >= MME_CLS_TURING)
mme_tu104_end_if(b);
else if (b->cls >= MME_CLS_FERMI)
mme_fermi_end_if(b);
else
unreachable("Unsupported GPU class");
}
@@ -516,6 +558,8 @@ mme_start_while(struct mme_builder *b)
{
if (b->cls >= MME_CLS_TURING)
mme_tu104_start_while(b);
else if (b->cls >= MME_CLS_FERMI)
mme_fermi_start_while(b);
else
unreachable("Unsupported GPU class");
}
@@ -527,6 +571,8 @@ mme_end_while_##op(struct mme_builder *b, \
{ \
if (b->cls >= MME_CLS_TURING) \
mme_tu104_end_while(b, MME_CMP_OP_##OP, if_true, x, y); \
else if (b->cls >= MME_CLS_FERMI) \
mme_fermi_end_while(b, MME_CMP_OP_##OP, if_true, x, y); \
else \
unreachable("Unsupported GPU class"); \
}
+754
View File
@@ -0,0 +1,754 @@
#include "mme_builder.h"
#include <stdio.h>
#include <stdlib.h>
// NOTE: We reserve R0 (zero register) and R1 (contains the first parameter at start)
#define MME_FERMI_RESERVED_INST_MASK 0xFFFFFF03
#define MME_FERMI_IMM_ADD_MAX_BITS 17
#define MME_FERMI_IMM_ADD_MAX_SIZE ((1 << (MME_FERMI_IMM_ADD_MAX_BITS + 1)) - 1)
// NOTE: As the add immediate operation work with sighed values, we drop the sign part.
#define MME_FERMI_IMM_LOAD_MAX_BITS (MME_FERMI_IMM_ADD_MAX_BITS - 1)
#define MME_FERMI_IMM_LOAD_MAX_SIZE ((1 << (MME_FERMI_IMM_LOAD_MAX_BITS + 1)) - 1)
void
mme_fermi_builder_init(struct mme_builder *b)
{
/* R0 is reserved for the zero register */
mme_reg_alloc_init(&b->reg_alloc, 0xfe);
/* Pre-allocate R1 for the first parameter value */
ASSERTED struct mme_value r1 = mme_reg_alloc_alloc(&b->reg_alloc);
assert(r1.reg == 1);
}
static inline bool
mme_fermi_is_zero_or_reg(struct mme_value x)
{
switch (x.type) {
case MME_VALUE_TYPE_ZERO: return true;
case MME_VALUE_TYPE_IMM: return x.imm == 0;
case MME_VALUE_TYPE_REG: return true;
default: unreachable("Invalid MME value type");
}
}
static inline bool
mme_fermi_is_zero_or_imm(struct mme_value x)
{
switch (x.type) {
case MME_VALUE_TYPE_ZERO: return true;
case MME_VALUE_TYPE_IMM: return true;
case MME_VALUE_TYPE_REG: return false;
default: unreachable("Invalid MME value type");
}
}
static inline enum mme_fermi_reg
mme_value_alu_reg(struct mme_value val)
{
assert(mme_fermi_is_zero_or_reg(val));
switch (val.type) {
case MME_VALUE_TYPE_ZERO:
return MME_FERMI_REG_ZERO;
case MME_VALUE_TYPE_REG:
assert(val.reg > 0 && val.reg <= 7);
return MME_FERMI_REG_ZERO + val.reg;
case MME_VALUE_TYPE_IMM:
return MME_FERMI_REG_ZERO;
}
unreachable("Invalid value type");
}
static inline uint32_t
mme_value_alu_imm(struct mme_value val)
{
assert(mme_fermi_is_zero_or_imm(val));
switch (val.type) {
case MME_VALUE_TYPE_ZERO:
return 0;
case MME_VALUE_TYPE_IMM:
return val.imm;
case MME_VALUE_TYPE_REG:
return 0;
}
unreachable("Invalid value type");
}
static inline void
mme_free_reg_if_tmp(struct mme_builder *b,
struct mme_value data,
struct mme_value maybe_tmp)
{
if (!mme_is_zero(data) &&
!mme_is_zero(maybe_tmp) &&
data.type != maybe_tmp.type)
mme_free_reg(b, maybe_tmp);
}
static void
mme_fermi_new_inst(struct mme_fermi_builder *b)
{
struct mme_fermi_inst noop = { MME_FERMI_INST_DEFAULTS };
assert(b->inst_count < ARRAY_SIZE(b->insts));
b->insts[b->inst_count] = noop;
b->inst_count++;
b->inst_parts = 0;
}
static struct mme_fermi_inst *
mme_fermi_cur_inst(struct mme_fermi_builder *b)
{
assert(b->inst_count > 0 && b->inst_count < ARRAY_SIZE(b->insts));
return &b->insts[b->inst_count - 1];
}
void
mme_fermi_add_inst(struct mme_builder *b,
const struct mme_fermi_inst *inst)
{
struct mme_fermi_builder *fb = &b->fermi;
if (fb->inst_parts || fb->inst_count == 0)
mme_fermi_new_inst(fb);
*mme_fermi_cur_inst(fb) = *inst;
mme_fermi_new_inst(fb);
}
static inline void
mme_fermi_set_inst_parts(struct mme_fermi_builder *b,
enum mme_fermi_instr_parts parts)
{
assert(!(b->inst_parts & parts));
b->inst_parts |= parts;
}
static inline bool
mme_fermi_next_inst_can_fit_a_full_inst(struct mme_fermi_builder *b)
{
return !mme_fermi_is_empty(b) && b->inst_parts == 0;
}
void
mme_fermi_mthd_arr(struct mme_builder *b,
uint16_t mthd, struct mme_value index)
{
struct mme_fermi_builder *fb = &b->fermi;
struct mme_value src_reg = mme_zero();
if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
mme_fermi_new_inst(fb);
struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
uint32_t mthd_imm = (1 << 12) | (mthd >> 2);
if (index.type == MME_VALUE_TYPE_REG) {
src_reg = index;
} else if (index.type == MME_VALUE_TYPE_IMM) {
mthd_imm += index.imm;
}
inst->op = MME_FERMI_OP_ADD_IMM;
inst->src[0] = mme_value_alu_reg(src_reg);
inst->imm = mthd_imm;
inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR;
inst->dst = MME_FERMI_REG_ZERO;
mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
MME_FERMI_INSTR_PART_ASSIGN);
}
static inline bool
mme_fermi_prev_inst_can_emit(struct mme_fermi_builder *b, struct mme_value data) {
if (mme_fermi_is_empty(b)) {
return false;
}
if ((b->inst_parts & MME_FERMI_INSTR_PART_ASSIGN) == MME_FERMI_INSTR_PART_ASSIGN) {
struct mme_fermi_inst *inst = mme_fermi_cur_inst(b);
if (inst->assign_op == MME_FERMI_ASSIGN_OP_MOVE && data.type == MME_VALUE_TYPE_REG &&
mme_value_alu_reg(data) == inst->dst) {
return true;
}
}
return false;
}
static inline bool
mme_fermi_next_inst_can_emit(struct mme_fermi_builder *fb,
struct mme_value data)
{
if (mme_fermi_is_empty(fb))
return false;
if (fb->inst_parts == 0)
return true;
return mme_fermi_prev_inst_can_emit(fb, data);
}
static inline struct mme_value
mme_fermi_reg(uint32_t reg)
{
struct mme_value val = {
.type = MME_VALUE_TYPE_REG,
.reg = reg,
};
return val;
}
static inline void
mme_fermi_add_imm17(struct mme_fermi_builder *fb,
struct mme_value dst_reg,
struct mme_value src_reg,
uint32_t val)
{
assert(dst_reg.type == MME_VALUE_TYPE_REG &&
mme_fermi_is_zero_or_reg(src_reg) &&
val <= MME_FERMI_IMM_ADD_MAX_SIZE);
if (!mme_fermi_next_inst_can_fit_a_full_inst(fb)) {
mme_fermi_new_inst(fb);
}
struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
inst->op = MME_FERMI_OP_ADD_IMM;
inst->src[0] = mme_value_alu_reg(src_reg);
inst->imm = val;
inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
inst->dst = mme_value_alu_reg(dst_reg);
mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
MME_FERMI_INSTR_PART_ASSIGN);
}
static bool
mme_fermi_bfe_lsl_can_use_imm(struct mme_fermi_builder *b,
struct mme_value src_bits,
struct mme_value dst_bits)
{
return (mme_fermi_is_zero_or_reg(src_bits) &&
mme_fermi_is_zero_or_imm(dst_bits) &&
mme_value_alu_imm(dst_bits) <= 31);
}
static bool
mme_fermi_bfe_lsl_can_use_reg(struct mme_fermi_builder *b,
struct mme_value src_bits,
struct mme_value dst_bits)
{
return (mme_fermi_is_zero_or_imm(src_bits) &&
mme_fermi_is_zero_or_reg(dst_bits) &&
mme_value_alu_imm(src_bits) <= 31);
}
static void
mme_fermi_bfe(struct mme_fermi_builder *fb,
struct mme_value dst_reg,
struct mme_value src_bits,
struct mme_value src_reg,
struct mme_value dst_bits,
struct mme_value size)
{
assert(dst_reg.type == MME_VALUE_TYPE_REG &&
mme_fermi_is_zero_or_reg(src_reg) &&
(mme_fermi_bfe_lsl_can_use_imm(fb, src_bits, dst_bits) ||
mme_fermi_bfe_lsl_can_use_reg(fb, src_bits, dst_bits)));
if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
mme_fermi_new_inst(fb);
struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
if (mme_fermi_bfe_lsl_can_use_imm(fb, src_bits, dst_bits)) {
inst->op = MME_FERMI_OP_BFE_LSL_IMM;
inst->src[0] = mme_value_alu_reg(src_bits);
inst->src[1] = mme_value_alu_reg(src_reg);
inst->bitfield.dst_bit = mme_value_alu_imm(dst_bits);
inst->bitfield.size = mme_value_alu_imm(size);
} else if (mme_fermi_bfe_lsl_can_use_reg(fb, src_bits, dst_bits)) {
inst->op = MME_FERMI_OP_BFE_LSL_REG;
inst->src[0] = mme_value_alu_reg(dst_bits);
inst->src[1] = mme_value_alu_reg(src_reg);
inst->bitfield.src_bit = mme_value_alu_imm(src_bits);
inst->bitfield.size = mme_value_alu_imm(size);
}
inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
inst->dst = mme_value_alu_reg(dst_reg);
mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
MME_FERMI_INSTR_PART_ASSIGN);
}
static void
mme_fermi_sll_to(struct mme_fermi_builder *b,
struct mme_value dst,
struct mme_value x,
struct mme_value y)
{
assert(mme_fermi_is_zero_or_reg(dst));
mme_fermi_bfe(b, dst, mme_zero(), x, y, mme_imm(31));
}
static void
mme_fermi_srl_to(struct mme_fermi_builder *b,
struct mme_value dst,
struct mme_value x,
struct mme_value y)
{
assert(mme_fermi_is_zero_or_reg(dst));
mme_fermi_bfe(b, dst, y, x, mme_zero(), mme_imm(31));
}
static struct mme_value
mme_fermi_load_imm_to_reg(struct mme_builder *b, struct mme_value data)
{
struct mme_fermi_builder *fb = &b->fermi;
assert(data.type == MME_VALUE_TYPE_IMM ||
data.type == MME_VALUE_TYPE_ZERO);
/* If the immediate is zero, we can simplify this */
if (mme_is_zero(data)) {
return mme_zero();
} else {
uint32_t imm = data.imm;
struct mme_value dst = mme_alloc_reg(b);
if (imm > MME_FERMI_IMM_LOAD_MAX_SIZE) {
/* TODO: a possible optimisation involve searching for the first bit
* offset and see if it can fit in 16 bits.
*/
uint32_t high_bits = (imm >> (MME_FERMI_IMM_LOAD_MAX_BITS + 1)) & MME_FERMI_IMM_LOAD_MAX_SIZE;
uint32_t low_bits = imm & MME_FERMI_IMM_LOAD_MAX_SIZE;
mme_fermi_add_imm17(fb, dst, mme_zero(), high_bits);
mme_fermi_sll_to(fb, dst, dst,
mme_imm(MME_FERMI_IMM_LOAD_MAX_BITS + 1));
mme_fermi_add_imm17(fb, dst, dst, low_bits);
} else {
mme_fermi_add_imm17(fb, dst, mme_zero(), imm);
}
return dst;
}
}
static inline struct mme_value
mme_fermi_value_as_reg(struct mme_builder *b,
struct mme_value data)
{
if (data.type == MME_VALUE_TYPE_REG || mme_is_zero(data)) {
return data;
}
return mme_fermi_load_imm_to_reg(b, data);
}
void mme_fermi_emit(struct mme_builder *b,
struct mme_value data)
{
struct mme_fermi_builder *fb = &b->fermi;
struct mme_fermi_inst *inst;
/* Check if previous assign was to the same dst register and modify assign
* mode if needed
*/
if (mme_fermi_prev_inst_can_emit(fb, data)) {
inst = mme_fermi_cur_inst(fb);
inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_EMIT;
} else {
struct mme_value data_reg = mme_fermi_value_as_reg(b, data);
/* Because of mme_fermi_value_as_reg, it is possible that a new load
* that can be simplify
*/
if (mme_fermi_prev_inst_can_emit(fb, data_reg)) {
inst = mme_fermi_cur_inst(fb);
inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_EMIT;
} else {
if (!mme_fermi_next_inst_can_emit(fb, data))
mme_fermi_new_inst(fb);
inst = mme_fermi_cur_inst(fb);
inst->op = MME_FERMI_OP_ALU_REG;
inst->alu_op = MME_FERMI_ALU_OP_ADD;
inst->src[0] = mme_value_alu_reg(data_reg);
inst->src[1] = MME_FERMI_REG_ZERO;
inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_EMIT;
inst->dst = MME_FERMI_REG_ZERO;
mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
MME_FERMI_INSTR_PART_ASSIGN);
}
mme_free_reg_if_tmp(b, data, data_reg);
}
}
static void
mme_fermi_branch(struct mme_fermi_builder *fb,
enum mme_fermi_reg src, int32_t offset, bool if_zero)
{
if (fb->inst_parts || mme_fermi_is_empty(fb))
mme_fermi_new_inst(fb);
struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
inst->op = MME_FERMI_OP_BRANCH;
inst->src[0] = src;
inst->imm = offset;
inst->branch.no_delay = true;
inst->branch.not_zero = if_zero;
mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
MME_FERMI_INSTR_PART_ASSIGN);
}
static void
mme_fermi_start_cf(struct mme_builder *b,
enum mme_cf_type type,
struct mme_value cond,
bool is_zero)
{
struct mme_fermi_builder *fb = &b->fermi;
/* The condition here is inverted because we want to branch and skip the
* block when the condition fails.
*/
assert(mme_fermi_is_zero_or_reg(cond));
mme_fermi_branch(fb, mme_value_alu_reg(cond), 0, is_zero);
uint16_t ip = fb->inst_count - 1;
assert(fb->insts[ip].op == MME_FERMI_OP_BRANCH);
assert(fb->cf_depth < ARRAY_SIZE(fb->cf_stack));
fb->cf_stack[fb->cf_depth++] = (struct mme_cf) {
.type = type,
.start_ip = ip,
};
/* The inside of control-flow needs to start with a new instruction */
mme_fermi_new_inst(fb);
}
static struct mme_cf
mme_fermi_end_cf(struct mme_builder *b, enum mme_cf_type type)
{
struct mme_fermi_builder *fb = &b->fermi;
if (fb->inst_parts)
mme_fermi_new_inst(fb);
assert(fb->cf_depth > 0);
struct mme_cf cf = fb->cf_stack[--fb->cf_depth];
assert(cf.type == type);
assert(fb->insts[cf.start_ip].op == MME_FERMI_OP_BRANCH);
fb->insts[cf.start_ip].imm = fb->inst_count - cf.start_ip - 1;
return cf;
}
void
mme_fermi_start_if(struct mme_builder *b,
enum mme_cmp_op op,
bool if_true,
struct mme_value x,
struct mme_value y)
{
assert(op == MME_CMP_OP_EQ);
if (mme_is_zero(x)) {
mme_fermi_start_cf(b, MME_CF_TYPE_IF, y, if_true);
} else if (mme_is_zero(y)) {
mme_fermi_start_cf(b, MME_CF_TYPE_IF, x, if_true);
} else {
struct mme_value tmp = mme_xor(b, x, y);
mme_fermi_start_cf(b, MME_CF_TYPE_IF, tmp, if_true);
mme_free_reg(b, tmp);
}
}
void
mme_fermi_end_if(struct mme_builder *b)
{
mme_fermi_end_cf(b, MME_CF_TYPE_IF);
}
void
mme_fermi_start_while(struct mme_builder *b)
{
mme_fermi_start_cf(b, MME_CF_TYPE_WHILE, mme_zero(), false);
}
static void
mme_fermi_end_while_zero(struct mme_builder *b,
struct mme_value cond,
bool is_zero)
{
struct mme_fermi_builder *fb = &b->fermi;
struct mme_cf cf = mme_fermi_end_cf(b, MME_CF_TYPE_WHILE);
int delta = fb->inst_count - cf.start_ip - 2;
mme_fermi_branch(fb, mme_value_alu_reg(cond), -delta, !is_zero);
}
void
mme_fermi_end_while(struct mme_builder *b,
enum mme_cmp_op op,
bool if_true,
struct mme_value x,
struct mme_value y)
{
assert(op == MME_CMP_OP_EQ);
if (mme_is_zero(x)) {
mme_fermi_end_while_zero(b, y, if_true);
} else if (mme_is_zero(y)) {
mme_fermi_end_while_zero(b, x, if_true);
} else {
struct mme_value tmp = mme_xor(b, x, y);
mme_fermi_end_while_zero(b, tmp, if_true);
mme_free_reg(b, tmp);
}
}
void
mme_fermi_start_loop(struct mme_builder *b,
struct mme_value count)
{
struct mme_fermi_builder *fb = &b->fermi;
assert(mme_is_zero(fb->loop_counter));
fb->loop_counter = mme_mov(b, count);
mme_start_while(b);
}
void
mme_fermi_end_loop(struct mme_builder *b)
{
struct mme_fermi_builder *fb = &b->fermi;
mme_sub_to(b, fb->loop_counter, fb->loop_counter, mme_imm(1));
mme_fermi_end_while_zero(b, fb->loop_counter, false);
mme_free_reg(b, fb->loop_counter);
fb->loop_counter = mme_zero();
}
static inline bool
mme_fermi_next_inst_can_load_to(struct mme_fermi_builder *b)
{
return !mme_fermi_is_empty(b) && !(b->inst_parts & MME_FERMI_INSTR_PART_ASSIGN);
}
void mme_fermi_load_to(struct mme_builder *b,
struct mme_value dst)
{
struct mme_fermi_builder *fb = &b->fermi;
assert(dst.type == MME_VALUE_TYPE_REG ||
dst.type == MME_VALUE_TYPE_ZERO);
if (!fb->first_loaded) {
struct mme_value r1 = {
.type = MME_VALUE_TYPE_REG,
.reg = 1,
};
mme_mov_to(b, dst, r1);
mme_free_reg(b, r1);
fb->first_loaded = true;
return;
}
if (!mme_fermi_next_inst_can_load_to(fb))
mme_fermi_new_inst(fb);
struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
inst->assign_op = MME_FERMI_ASSIGN_OP_LOAD;
inst->dst = mme_value_alu_reg(dst);
mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_ASSIGN);
}
struct mme_value
mme_fermi_load(struct mme_builder *b)
{
struct mme_fermi_builder *fb = &b->fermi;
if (!fb->first_loaded) {
struct mme_value r1 = {
.type = MME_VALUE_TYPE_REG,
.reg = 1,
};
fb->first_loaded = true;
return r1;
}
struct mme_value dst = mme_alloc_reg(b);
mme_fermi_load_to(b, dst);
return dst;
}
static enum mme_tu104_alu_op
mme_to_fermi_alu_op(enum mme_alu_op op)
{
switch (op) {
#define ALU_CASE(op) case MME_ALU_OP_##op: return MME_FERMI_ALU_OP_##op;
ALU_CASE(ADD)
ALU_CASE(ADDC)
ALU_CASE(SUB)
ALU_CASE(SUBB)
ALU_CASE(AND)
ALU_CASE(NAND)
ALU_CASE(OR)
ALU_CASE(XOR)
#undef ALU_CASE
default:
unreachable("Unsupported MME ALU op");
}
}
void
mme_fermi_alu_to(struct mme_builder *b,
struct mme_value dst,
enum mme_alu_op op,
struct mme_value x,
struct mme_value y)
{
struct mme_fermi_builder *fb = &b->fermi;
switch (op) {
case MME_ALU_OP_SLL:
mme_fermi_sll_to(fb, dst, x, y);
return;
case MME_ALU_OP_SRL:
mme_fermi_srl_to(fb, dst, x, y);
return;
default:
break;
}
assert(mme_fermi_is_zero_or_reg(dst));
struct mme_value x_reg = mme_fermi_value_as_reg(b, x);
struct mme_value y_reg = mme_fermi_value_as_reg(b, y);
if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
mme_fermi_new_inst(fb);
struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
inst->op = MME_FERMI_OP_ALU_REG;
inst->alu_op = mme_to_fermi_alu_op(op);
inst->src[0] = mme_value_alu_reg(x_reg);
inst->src[1] = mme_value_alu_reg(y_reg);
inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
inst->dst = mme_value_alu_reg(dst);
mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
MME_FERMI_INSTR_PART_ASSIGN);
mme_free_reg_if_tmp(b, x, x_reg);
mme_free_reg_if_tmp(b, y, y_reg);
}
void mme_fermi_state_arr_to(struct mme_builder *b,
struct mme_value dst,
uint16_t state,
struct mme_value index)
{
struct mme_fermi_builder *fb = &b->fermi;
assert(mme_fermi_is_zero_or_reg(dst));
assert(state % 4 == 0);
struct mme_value index_reg = mme_fermi_value_as_reg(b, index);
if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
mme_fermi_new_inst(fb);
struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
inst->op = MME_FERMI_OP_STATE;
inst->src[0] = mme_value_alu_reg(index_reg);
inst->src[1] = MME_FERMI_REG_ZERO;
inst->imm = state >> 2;
inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
inst->dst = mme_value_alu_reg(dst);
mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
MME_FERMI_INSTR_PART_ASSIGN);
mme_free_reg_if_tmp(b, index, index_reg);
}
void
mme_fermi_merge_to(struct mme_builder *b, struct mme_value dst,
struct mme_value x, struct mme_value y,
uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
{
struct mme_fermi_builder *fb = &b->fermi;
assert(mme_fermi_is_zero_or_reg(dst));
assert(dst_pos < 32);
assert(bits < 32);
assert(src_pos < 32);
if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
mme_fermi_new_inst(fb);
struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
inst->op = MME_FERMI_OP_MERGE;
inst->src[0] = mme_value_alu_reg(x);
inst->src[1] = mme_value_alu_reg(y);
inst->bitfield.dst_bit = dst_pos;
inst->bitfield.src_bit = src_pos;
inst->bitfield.size = bits;
inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
inst->dst = mme_value_alu_reg(dst);
mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
MME_FERMI_INSTR_PART_ASSIGN);
}
uint32_t *
mme_fermi_builder_finish(struct mme_fermi_builder *b, size_t *size_out)
{
assert(b->cf_depth == 0);
/* TODO: If there are at least two instructions and we can guarantee the
* last two instructions get exeucted (not in control-flow), we don't need
* to add a pair of NOPs.
*/
mme_fermi_new_inst(b);
mme_fermi_new_inst(b);
b->insts[b->inst_count - 2].end_next = true;
size_t enc_size = b->inst_count * sizeof(uint32_t);
uint32_t *enc = malloc(enc_size);
if (enc != NULL) {
mme_fermi_encode(enc, b->inst_count, b->insts);
*size_out = enc_size;
}
return enc;
}
+115
View File
@@ -0,0 +1,115 @@
#ifndef MME_BUILDER_H
#error "This file must only be included by mme_builder.h"
#endif
#include "mme_fermi.h"
#include "mme_value.h"
#include "util/bitscan.h"
#include "util/enum_operators.h"
#ifdef __cplusplus
extern "C" {
#endif
#define MME_FERMI_BUILDER_MAX_INSTS 128
enum mme_fermi_instr_parts {
MME_FERMI_INSTR_PART_OP = BITFIELD_BIT(0),
MME_FERMI_INSTR_PART_ASSIGN = BITFIELD_BIT(1)
};
struct mme_fermi_builder {
bool first_loaded;
uint32_t inst_count;
enum mme_fermi_instr_parts inst_parts;
struct mme_fermi_inst insts[MME_FERMI_BUILDER_MAX_INSTS];
uint32_t cf_depth;
struct mme_value loop_counter;
struct mme_cf cf_stack[8];
};
void mme_fermi_builder_init(struct mme_builder *b);
uint32_t * mme_fermi_builder_finish(struct mme_fermi_builder *b, size_t *size_out);
void mme_fermi_add_inst(struct mme_builder *b,
const struct mme_fermi_inst *inst);
static inline bool
mme_fermi_is_empty(struct mme_fermi_builder *b)
{
return b->inst_count == 0;
}
#define mme_fermi_asm(b, __inst) \
for (struct mme_fermi_inst __inst = { MME_FERMI_INST_DEFAULTS }; \
!__inst.end_next; \
mme_fermi_add_inst((b), &__inst), __inst.end_next = true)
void mme_fermi_mthd_arr(struct mme_builder *b,
uint16_t mthd,
struct mme_value index);
void mme_fermi_emit(struct mme_builder *b,
struct mme_value data);
void mme_fermi_start_loop(struct mme_builder *b,
struct mme_value count);
void mme_fermi_end_loop(struct mme_builder *b);
void mme_fermi_start_if(struct mme_builder *b,
enum mme_cmp_op op,
bool if_true,
struct mme_value x,
struct mme_value y);
void mme_fermi_end_if(struct mme_builder *b);
void mme_fermi_start_while(struct mme_builder *b);
void mme_fermi_end_while(struct mme_builder *b,
enum mme_cmp_op op,
bool if_true,
struct mme_value x,
struct mme_value y);
void mme_fermi_load_to(struct mme_builder *b,
struct mme_value dst);
struct mme_value mme_fermi_load(struct mme_builder *b);
void
mme_fermi_alu_to(struct mme_builder *b,
struct mme_value dst,
enum mme_alu_op op,
struct mme_value x,
struct mme_value y);
static inline void
mme_fermi_alu64_to(struct mme_builder *b,
struct mme_value64 dst,
enum mme_alu_op op_lo,
enum mme_alu_op op_hi,
struct mme_value64 x,
struct mme_value64 y)
{
assert(dst.lo.type == MME_VALUE_TYPE_REG);
assert(dst.hi.type == MME_VALUE_TYPE_REG);
mme_fermi_alu_to(b, dst.lo, op_lo, x.lo, y.lo);
mme_fermi_alu_to(b, dst.hi, op_hi, x.hi, y.hi);
}
void
mme_fermi_merge_to(struct mme_builder *b, struct mme_value dst,
struct mme_value x, struct mme_value y,
uint16_t dst_pos, uint16_t bits, uint16_t src_pos);
void mme_fermi_state_arr_to(struct mme_builder *b,
struct mme_value dst,
uint16_t state,
struct mme_value index);
#ifdef __cplusplus
}
#endif