From 663258be5ebc86153b70bb235e44af9aad3cdc9d Mon Sep 17 00:00:00 2001 From: Mary Date: Thu, 5 Jan 2023 23:43:00 +0100 Subject: [PATCH] nouveau/mme: Add initial Fermi definition Part-of: --- src/nouveau/mme/meson.build | 47 +++++- src/nouveau/mme/mme_fermi.c | 281 +++++++++++++++++++++++++++++++ src/nouveau/mme/mme_fermi.h | 148 ++++++++++++++++ src/nouveau/mme/mme_fermi.xml | 219 ++++++++++++++++++++++++ src/nouveau/mme/mme_fermi_dump.c | 24 +++ 5 files changed, 713 insertions(+), 6 deletions(-) create mode 100644 src/nouveau/mme/mme_fermi.c create mode 100644 src/nouveau/mme/mme_fermi.h create mode 100644 src/nouveau/mme/mme_fermi.xml create mode 100644 src/nouveau/mme/mme_fermi_dump.c diff --git a/src/nouveau/mme/meson.build b/src/nouveau/mme/meson.build index 6b43bfd433f..50f6e515a22 100644 --- a/src/nouveau/mme/meson.build +++ b/src/nouveau/mme/meson.build @@ -18,12 +18,12 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -mme_isa_depend_files = [ +mme_tu104_isa_depend_files = [ 'mme_tu104.xml', isaspec_py_deps ] -mme_isa = custom_target( +mme_tu104_isa = custom_target( 'mme_isa', input: ['mme_tu104.xml'], output: ['mme_tu104_isa.c', 'mme_tu104_isa.h'], @@ -31,21 +31,50 @@ mme_isa = custom_target( prog_isaspec_decode, '--xml', '@INPUT@', '--out-c', '@OUTPUT0@', '--out-h', '@OUTPUT1@', ], - depend_files: mme_isa_depend_files, + depend_files: mme_tu104_isa_depend_files, ) -mme_encode_h = custom_target( +mme_tu104_encode_h = custom_target( 'mme-tu104-encode.h', input: ['mme_tu104.xml'], output: 'mme_tu104_encode.h', command: [ prog_isaspec_encode, '--xml', '@INPUT@', '--out-h', '@OUTPUT@' ], - depend_files: mme_isa_depend_files, + depend_files: mme_tu104_isa_depend_files, +) + +mme_fermi_isa_depend_files = [ + 'mme_fermi.xml', + isaspec_py_deps +] + +mme_fermi_isa = custom_target( + 'mme_fermi_isa', + input: ['mme_fermi.xml'], + output: ['mme_fermi_isa.c', 'mme_fermi_isa.h'], + command: [ + prog_isaspec_decode, '--xml', '@INPUT@', + '--out-c', '@OUTPUT0@', '--out-h', '@OUTPUT1@', + ], + depend_files: mme_fermi_isa_depend_files, +) + +mme_fermi_encode_h = custom_target( + 'mme-fermi-encode.h', + input: ['mme_fermi.xml'], + output: 'mme_fermi_encode.h', + command: [ + prog_isaspec_encode, '--xml', '@INPUT@', '--out-h', '@OUTPUT@' + ], + depend_files: mme_fermi_isa_depend_files, ) libnouveau_mme_files = files( 'mme_builder.h', + 'mme_fermi.c', + 'mme_fermi.h', + 'mme_fermi_dump.c', 'mme_tu104.c', 'mme_tu104.h', 'mme_tu104_builder.c', @@ -56,7 +85,13 @@ libnouveau_mme_files = files( _libnouveau_mme = static_library( 'nouveau_mme', - [libnouveau_mme_files, mme_isa, mme_encode_h], + [ + libnouveau_mme_files, + mme_fermi_isa, + mme_fermi_encode_h, + mme_tu104_isa, + mme_tu104_encode_h, + ], include_directories : [inc_include, inc_src], gnu_symbol_visibility : 'hidden', dependencies : [ diff --git a/src/nouveau/mme/mme_fermi.c b/src/nouveau/mme/mme_fermi.c new file mode 100644 index 00000000000..32a22af54df --- /dev/null +++ b/src/nouveau/mme/mme_fermi.c @@ -0,0 +1,281 @@ +#include "mme_fermi.h" +#include "mme_fermi_encode.h" + +#include "util/u_math.h" + +#define OP_TO_STR(OP) [MME_FERMI_OP_##OP] = #OP +static const char *op_to_str[] = { + OP_TO_STR(ALU_REG), + OP_TO_STR(ADD_IMM), + OP_TO_STR(MERGE), + OP_TO_STR(BFE_LSL_IMM), + OP_TO_STR(BFE_LSL_REG), + OP_TO_STR(STATE), + OP_TO_STR(UNK6), + OP_TO_STR(BRANCH), +}; +#undef OP_TO_STR + +const char * +mme_fermi_op_to_str(enum mme_fermi_op op) +{ + assert(op < ARRAY_SIZE(op_to_str)); + return op_to_str[op]; +} + +#define ALU_OP_TO_STR(OP) [MME_FERMI_ALU_OP_##OP] = #OP +static const char *alu_op_to_str[] = { + ALU_OP_TO_STR(ADD), + ALU_OP_TO_STR(ADDC), + ALU_OP_TO_STR(SUB), + ALU_OP_TO_STR(SUBB), + ALU_OP_TO_STR(RESERVED4), + ALU_OP_TO_STR(RESERVED5), + ALU_OP_TO_STR(RESERVED6), + ALU_OP_TO_STR(RESERVED7), + ALU_OP_TO_STR(XOR), + ALU_OP_TO_STR(OR), + ALU_OP_TO_STR(AND), + ALU_OP_TO_STR(AND_NOT), + ALU_OP_TO_STR(NAND), + ALU_OP_TO_STR(RESERVED13), + ALU_OP_TO_STR(RESERVED14), + ALU_OP_TO_STR(RESERVED15), + ALU_OP_TO_STR(RESERVED16), + ALU_OP_TO_STR(RESERVED17), + ALU_OP_TO_STR(RESERVED18), + ALU_OP_TO_STR(RESERVED19), + ALU_OP_TO_STR(RESERVED20), + ALU_OP_TO_STR(RESERVED21), + ALU_OP_TO_STR(RESERVED22), + ALU_OP_TO_STR(RESERVED23), + ALU_OP_TO_STR(RESERVED24), + ALU_OP_TO_STR(RESERVED25), + ALU_OP_TO_STR(RESERVED26), + ALU_OP_TO_STR(RESERVED27), + ALU_OP_TO_STR(RESERVED28), + ALU_OP_TO_STR(RESERVED29), + ALU_OP_TO_STR(RESERVED30), + ALU_OP_TO_STR(RESERVED31), +}; +#undef ALU_OP_TO_STR + +const char * +mme_fermi_alu_op_to_str(enum mme_fermi_alu_op op) +{ + assert(op < ARRAY_SIZE(alu_op_to_str)); + return alu_op_to_str[op]; +} + +#define ASSIGN_OP_TO_STR(OP) [MME_FERMI_ASSIGN_OP_##OP] = #OP +static const char *assign_op_to_str[] = { + ASSIGN_OP_TO_STR(LOAD), + ASSIGN_OP_TO_STR(MOVE), + ASSIGN_OP_TO_STR(MOVE_SET_MADDR), + ASSIGN_OP_TO_STR(LOAD_EMIT), + ASSIGN_OP_TO_STR(MOVE_EMIT), + ASSIGN_OP_TO_STR(LOAD_SET_MADDR), + ASSIGN_OP_TO_STR(MOVE_SET_MADDR_LOAD_EMIT), + ASSIGN_OP_TO_STR(MOVE_SET_MADDR_LOAD_EMIT_HIGH), +}; +#undef ASSIGN_OP_TO_STR + +const char * +mme_fermi_assign_op_to_str(enum mme_fermi_assign_op op) +{ + assert(op < ARRAY_SIZE(assign_op_to_str)); + return assign_op_to_str[op]; +} + +void mme_fermi_encode(uint32_t *out, uint32_t inst_count, + const struct mme_fermi_inst *insts) +{ + for (uint32_t i = 0; i < inst_count; i++) { + bitmask_t enc = encode__instruction(NULL, NULL, insts[i]); + out[i] = enc.bitset[0]; + } +} + +static uint64_t +unpack_field(bitmask_t bitmask, unsigned low, unsigned high, bool is_signed) +{ + bitmask_t field, mask; + + assert(high >= low); + + BITSET_ZERO(mask.bitset); + BITSET_SET_RANGE(mask.bitset, 0, high - low); + + BITSET_COPY(field.bitset, bitmask.bitset); + BITSET_SHR(field.bitset, low); + BITSET_AND(field.bitset, field.bitset, mask.bitset); + + uint64_t data = bitmask_to_uint64_t(field); + if (is_signed) + data = util_sign_extend(data, high - low + 1); + + return data; +} + +void mme_fermi_decode(struct mme_fermi_inst *insts, + const uint32_t *in, uint32_t inst_count) +{ + for (uint32_t i = 0; i < inst_count; i++) { + bitmask_t enc = { .bitset = { in[i] }}; + + insts[i].op = unpack_field(enc, 0, 3, false); + insts[i].end_next = unpack_field(enc, 7, 7, false); + insts[i].dst = unpack_field(enc, 8, 10, false); + + if (insts[i].op != MME_FERMI_OP_BRANCH) { + insts[i].assign_op = unpack_field(enc, 4, 6, false); + } + + if (insts[i].op == MME_FERMI_OP_ALU_REG) { + insts[i].src[0] = unpack_field(enc, 11, 13, false); + insts[i].src[1] = unpack_field(enc, 14, 16, false); + insts[i].alu_op = unpack_field(enc, 17, 21, false); + } else if (insts[i].op == MME_FERMI_OP_ADD_IMM || + insts[i].op == MME_FERMI_OP_STATE) { + insts[i].src[0] = unpack_field(enc, 11, 13, false); + insts[i].imm = unpack_field(enc, 14, 31, false); + } else if (insts[i].op == MME_FERMI_OP_MERGE || + insts[i].op == MME_FERMI_OP_BFE_LSL_IMM || + insts[i].op == MME_FERMI_OP_BFE_LSL_REG) { + insts[i].src[0] = unpack_field(enc, 11, 13, false); + insts[i].src[1] = unpack_field(enc, 14, 16, false); + insts[i].bitfield.src_bit = unpack_field(enc, 17, 21, false); + insts[i].bitfield.size = unpack_field(enc, 22, 26, false); + insts[i].bitfield.dst_bit = unpack_field(enc, 27, 31, false); + } else if (insts[i].op == MME_FERMI_OP_BRANCH) { + insts[i].branch.not_zero = unpack_field(enc, 4, 4, false); + insts[i].branch.no_delay = unpack_field(enc, 5, 5, false); + insts[i].src[0] = unpack_field(enc, 11, 13, false); + insts[i].imm = unpack_field(enc, 14, 31, false); + } + } +} + +static void +print_indent(FILE *fp, unsigned depth) +{ + for (unsigned i = 0; i < depth; i++) + fprintf(fp, " "); +} + +static void +print_reg(FILE *fp, enum mme_fermi_reg reg) +{ + if (reg == MME_FERMI_REG_ZERO) { + fprintf(fp, " $zero"); + } else { + fprintf(fp, " $r%u", (unsigned)reg); + } +} + +static void +print_imm(FILE *fp, const struct mme_fermi_inst *inst) +{ + int32_t imm = util_mask_sign_extend(inst->imm, 18); + + fprintf(fp, " %d /* 0x%04x */", (int)imm, (unsigned)imm); +} + +void +mme_fermi_print_inst(FILE *fp, unsigned indent, + const struct mme_fermi_inst *inst) +{ + print_indent(fp, indent); + + switch (inst->op) { + case MME_FERMI_OP_ALU_REG: + fprintf(fp, "%s", mme_fermi_alu_op_to_str(inst->alu_op)); + print_reg(fp, inst->src[0]); + print_reg(fp, inst->src[1]); + + if (inst->alu_op == MME_FERMI_ALU_OP_ADDC) { + fprintf(fp, " $carry"); + } else if (inst->alu_op == MME_FERMI_ALU_OP_SUBB) { + fprintf(fp, " $borrow"); + } + break; + case MME_FERMI_OP_ADD_IMM: + case MME_FERMI_OP_STATE: + fprintf(fp, "%s", mme_fermi_op_to_str(inst->op)); + print_reg(fp, inst->src[0]); + print_imm(fp, inst); + break; + case MME_FERMI_OP_MERGE: + uint32_t src_bit = inst->bitfield.src_bit; + uint32_t size = inst->bitfield.size; + uint32_t dst_bit = inst->bitfield.dst_bit; + + fprintf(fp, "%s", mme_fermi_op_to_str(inst->op)); + print_reg(fp, inst->src[0]); + print_reg(fp, inst->src[1]); + fprintf(fp, " (%u, %u, %u)", src_bit, size, dst_bit); + break; + case MME_FERMI_OP_BFE_LSL_IMM: + fprintf(fp, "%s", mme_fermi_op_to_str(inst->op)); + print_reg(fp, inst->src[0]); + print_reg(fp, inst->src[1]); + fprintf(fp, " (%u, %u)", inst->bitfield.dst_bit, + inst->bitfield.size); + break; + case MME_FERMI_OP_BFE_LSL_REG: + fprintf(fp, "%s", mme_fermi_op_to_str(inst->op)); + print_reg(fp, inst->src[0]); + print_reg(fp, inst->src[1]); + fprintf(fp, " (%u, %u)", inst->bitfield.src_bit, + inst->bitfield.size); + break; + case MME_FERMI_OP_BRANCH: + if (inst->branch.not_zero) { + fprintf(fp, "BNZ"); + } else { + fprintf(fp, "BZ"); + } + print_reg(fp, inst->src[0]); + print_imm(fp, inst); + + if (inst->branch.no_delay) { + fprintf(fp, " NO_DELAY"); + } + + break; + default: + fprintf(fp, "%s", mme_fermi_op_to_str(inst->op)); + break; + } + + if (inst->op != MME_FERMI_OP_BRANCH) { + fprintf(fp, "\n"); + print_indent(fp, indent); + + fprintf(fp, "%s", mme_fermi_assign_op_to_str(inst->assign_op)); + print_reg(fp, inst->dst); + + if (inst->assign_op != MME_FERMI_ASSIGN_OP_LOAD) { + fprintf(fp, " $scratch"); + } + } + + if (inst->end_next) { + fprintf(fp, "\n"); + print_indent(fp, indent); + fprintf(fp, "END_NEXT"); + } + + fprintf(fp, "\n"); + +} + +void +mme_fermi_print(FILE *fp, const struct mme_fermi_inst *insts, + uint32_t inst_count) +{ + for (uint32_t i = 0; i < inst_count; i++) { + fprintf(fp, "%u:\n", i); + mme_fermi_print_inst(fp, 1, &insts[i]); + } +} diff --git a/src/nouveau/mme/mme_fermi.h b/src/nouveau/mme/mme_fermi.h new file mode 100644 index 00000000000..a7d2d501b14 --- /dev/null +++ b/src/nouveau/mme/mme_fermi.h @@ -0,0 +1,148 @@ +#ifndef MME_FERMI_H +#define MME_FERMI_H + +#include +#include +#include + +#include "util/macros.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +#define MME_FERMI_DRAM_COUNT 0xc00 +#define MME_FERMI_SCRATCH_COUNT 128 + +enum PACKED mme_fermi_reg { + MME_FERMI_REG_ZERO, + MME_FERMI_REG_R1, + MME_FERMI_REG_R2, + MME_FERMI_REG_R3, + MME_FERMI_REG_R4, + MME_FERMI_REG_R5, + MME_FERMI_REG_R6, + MME_FERMI_REG_R7, +}; + +enum PACKED mme_fermi_op { + MME_FERMI_OP_ALU_REG, + MME_FERMI_OP_ADD_IMM, + MME_FERMI_OP_MERGE, + MME_FERMI_OP_BFE_LSL_IMM, + MME_FERMI_OP_BFE_LSL_REG, + MME_FERMI_OP_STATE, + MME_FERMI_OP_UNK6, + MME_FERMI_OP_BRANCH, +}; + +const char *mme_fermi_op_to_str(enum mme_fermi_op op); + +enum PACKED mme_fermi_alu_op { + MME_FERMI_ALU_OP_ADD, + MME_FERMI_ALU_OP_ADDC, + MME_FERMI_ALU_OP_SUB, + MME_FERMI_ALU_OP_SUBB, + MME_FERMI_ALU_OP_RESERVED4, + MME_FERMI_ALU_OP_RESERVED5, + MME_FERMI_ALU_OP_RESERVED6, + MME_FERMI_ALU_OP_RESERVED7, + MME_FERMI_ALU_OP_XOR, + MME_FERMI_ALU_OP_OR, + MME_FERMI_ALU_OP_AND, + MME_FERMI_ALU_OP_AND_NOT, + MME_FERMI_ALU_OP_NAND, + MME_FERMI_ALU_OP_RESERVED13, + MME_FERMI_ALU_OP_RESERVED14, + MME_FERMI_ALU_OP_RESERVED15, + MME_FERMI_ALU_OP_RESERVED16, + MME_FERMI_ALU_OP_RESERVED17, + MME_FERMI_ALU_OP_RESERVED18, + MME_FERMI_ALU_OP_RESERVED19, + MME_FERMI_ALU_OP_RESERVED20, + MME_FERMI_ALU_OP_RESERVED21, + MME_FERMI_ALU_OP_RESERVED22, + MME_FERMI_ALU_OP_RESERVED23, + MME_FERMI_ALU_OP_RESERVED24, + MME_FERMI_ALU_OP_RESERVED25, + MME_FERMI_ALU_OP_RESERVED26, + MME_FERMI_ALU_OP_RESERVED27, + MME_FERMI_ALU_OP_RESERVED28, + MME_FERMI_ALU_OP_RESERVED29, + MME_FERMI_ALU_OP_RESERVED30, + MME_FERMI_ALU_OP_RESERVED31, +}; + +const char *mme_fermi_alu_op_to_str(enum mme_fermi_alu_op op); + + +enum PACKED mme_fermi_assign_op { + MME_FERMI_ASSIGN_OP_LOAD, + MME_FERMI_ASSIGN_OP_MOVE, + MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR, + MME_FERMI_ASSIGN_OP_LOAD_EMIT, + MME_FERMI_ASSIGN_OP_MOVE_EMIT, + MME_FERMI_ASSIGN_OP_LOAD_SET_MADDR, + MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR_LOAD_EMIT, + MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR_LOAD_EMIT_HIGH, +}; + +const char *mme_fermi_assign_op_to_str(enum mme_fermi_assign_op op); + +struct mme_fermi_bitfield { + uint8_t src_bit; + uint8_t size; + uint8_t dst_bit; +}; + +struct mme_fermi_branch { + bool not_zero; + bool no_delay; +}; + +struct mme_fermi_inst { + bool end_next; + enum mme_fermi_assign_op assign_op; + enum mme_fermi_op op; + enum mme_fermi_reg dst; + enum mme_fermi_reg src[2]; + int32_t imm; + union { + enum mme_fermi_alu_op alu_op; + struct mme_fermi_bitfield bitfield; + struct mme_fermi_branch branch; + }; +}; + +#define MME_FERMI_INST_DEFAULTS \ + .end_next = false, \ + .assign_op = MME_FERMI_ASSIGN_OP_MOVE, \ + .op = MME_FERMI_OP_ALU_REG, \ + .dst = MME_FERMI_REG_ZERO, \ + .src = { \ + MME_FERMI_REG_ZERO, \ + MME_FERMI_REG_ZERO \ + }, \ + .imm = 0, \ + .alu_op = MME_FERMI_ALU_OP_ADD, \ + +void mme_fermi_print_inst(FILE *fp, unsigned indent, + const struct mme_fermi_inst *inst); + +void mme_fermi_print(FILE *fp, const struct mme_fermi_inst *insts, + uint32_t inst_count); + +void mme_fermi_encode(uint32_t *out, uint32_t inst_count, + const struct mme_fermi_inst *insts); + +void mme_fermi_decode(struct mme_fermi_inst *insts, + const uint32_t *in, uint32_t inst_count); + +void mme_fermi_dump(FILE *fp, uint32_t *encoded, size_t encoded_size); + +#ifdef __cplusplus +} +#endif + +#endif /* MME_FERMI_H */ diff --git a/src/nouveau/mme/mme_fermi.xml b/src/nouveau/mme/mme_fermi.xml new file mode 100644 index 00000000000..0854745e24c --- /dev/null +++ b/src/nouveau/mme/mme_fermi.xml @@ -0,0 +1,219 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {NAME} {ASSIGN_OP} {ALU_OP} {DST} {SRC0} {SRC1} + + + + x + + + + + + + src.assign_op + src.alu_op + src.dst + src.src[0] + src.src[1] + + + + + + {NAME} {ASSIGN_OP} {DST} {SRC0} {IMM} + + + + x + + + + + + src.assign_op + src.dst + src.src[0] + src.imm + + + + + + {NAME} {ASSIGN_OP} {DST} {SRC0} {SRC1} {BF_SRC_BIT} {BF_SIZE} {BF_DST_BIT} + + + + x + + + + + + + + + src.assign_op + src.dst + src.src[0] + src.src[1] + src.bitfield.src_bit + src.bitfield.size + src.bitfield.dst_bit + + + + + + {NO_DELAY} B{NOT_ZERO} {SRC0} {IMM} + + + + + xxxxx + + + + + src.branch.not_zero + src.branch.no_delay + src.src[0] + src.imm + + + + + + Encoding of a NVIDIA Fermi Macro Method instruction. All instructions are 32b. + + + {END_NEXT} {OP} {ALU_OP_ENCODING} {SRC0_IMM_ENCODING} {BF_ENCODING} {BRANCH_ENCODING} + + + xxx + + xxxxxxxxxxxxxxxxxxxxxxxx + + + {OP} == 0 + + + + + {OP} == 1 || {OP} == 5 + + + + + {OP} == 2 || {OP} == 3 || {OP} == 4 + + + + + {OP} == 7 + + + + + src.end_next + src.op + src + src + src + src + + + diff --git a/src/nouveau/mme/mme_fermi_dump.c b/src/nouveau/mme/mme_fermi_dump.c new file mode 100644 index 00000000000..69bdd9c6e0d --- /dev/null +++ b/src/nouveau/mme/mme_fermi_dump.c @@ -0,0 +1,24 @@ +#include "mme_fermi.h" + +#include "mme_fermi_isa.h" +#include "isa.h" + +#include + +static void +disasm_instr_cb(void *d, unsigned n, void *instr) +{ + fprintf(d, "%3d[%08x]", n, *(uint32_t *)instr); +} + +void +mme_fermi_dump(FILE *fp, uint32_t *encoded, size_t encoded_size) +{ + const struct isa_decode_options opts = { + .show_errors = true, + .branch_labels = true, + .cbdata = fp, + .pre_instr_cb = disasm_instr_cb, + }; + isa_disasm(encoded, encoded_size, fp, &opts); +}