diff --git a/src/freedreno/.gitlab-ci/reference/afuc_test.asm b/src/freedreno/.gitlab-ci/reference/afuc_test.asm index c4e37572473..9fca2106422 100644 --- a/src/freedreno/.gitlab-ci/reference/afuc_test.asm +++ b/src/freedreno/.gitlab-ci/reference/afuc_test.asm @@ -1,141 +1,150 @@ ; a6xx microcode ; Version: 01000001 - [01000001] ; nop - [01000078] ; nop - mov $01, 0x0830 ; CP_SQE_INSTR_BASE - mov $02, 0x0002 - cwrite $01, [$00 + @REG_READ_ADDR], 0x0 - cwrite $02, [$00 + @REG_READ_DWORDS], 0x0 - mov $01, $regdata - mov $02, $regdata - add $01, $01, 0x0004 - addhi $02, $02, 0x0000 - mov $03, 0x0001 - cwrite $01, [$00 + @MEM_READ_ADDR], 0x0 - cwrite $02, [$00 + @MEM_READ_ADDR+0x1], 0x0 - cwrite $03, [$00 + @MEM_READ_DWORDS], 0x0 - rot $04, $memdata, 0x0008 - ushr $04, $04, 0x0006 - sub $04, $04, 0x0004 - add $01, $01, $04 - addhi $02, $02, 0x0000 - mov $rem, 0x0080 - cwrite $01, [$00 + @MEM_READ_ADDR], 0x0 - cwrite $02, [$00 + @MEM_READ_ADDR+0x1], 0x0 - cwrite $02, [$00 + @LOAD_STORE_HI], 0x0 - cwrite $rem, [$00 + @MEM_READ_DWORDS], 0x0 - cwrite $00, [$00 + @PACKET_TABLE_WRITE_ADDR], 0x0 - (rep)cwrite $memdata, [$00 + @PACKET_TABLE_WRITE], 0x0 - mov $02, 0x0883 ; CP_SCRATCH[0].REG - mov $03, 0xbeef - mov $04, 0xdead << 16 - or $03, $03, $04 - cwrite $02, [$00 + @REG_WRITE_ADDR], 0x0 - cwrite $03, [$00 + @REG_WRITE], 0x0 - waitin - mov $01, $data +[01000001] +[01000078] +mov $01, 0x830 ; CP_SQE_INSTR_BASE +mov $02, 0x2 +cwrite $01, [$00 + @REG_READ_ADDR], 0x0 +cwrite $02, [$00 + @REG_READ_DWORDS], 0x0 +mov $01, $regdata +mov $02, $regdata +add $01, $01, 0x4 +addhi $02, $02, 0x0 +mov $03, 0x1 +cwrite $01, [$00 + @MEM_READ_ADDR], 0x0 +cwrite $02, [$00 + @MEM_READ_ADDR+0x1], 0x0 +cwrite $03, [$00 + @MEM_READ_DWORDS], 0x0 +rot $04, $memdata, 0x8 +ushr $04, $04, 0x6 +sub $04, $04, 0x4 +add $01, $01, $04 +addhi $02, $02, 0x0 +mov $rem, 0x80 +cwrite $01, [$00 + @MEM_READ_ADDR], 0x0 +cwrite $02, [$00 + @MEM_READ_ADDR+0x1], 0x0 +cwrite $02, [$00 + @LOAD_STORE_HI], 0x0 +cwrite $rem, [$00 + @MEM_READ_DWORDS], 0x0 +cwrite $00, [$00 + @PACKET_TABLE_WRITE_ADDR], 0x0 +(rep)cwrite $memdata, [$00 + @PACKET_TABLE_WRITE], 0x0 +mov $02, 0x883 ; CP_SCRATCH[0].REG +mov $03, 0xbeef +mov $04, 0xdead << 16 +or $03, $03, $04 +cwrite $02, [$00 + @REG_WRITE_ADDR], 0x0 +cwrite $03, [$00 + @REG_WRITE], 0x0 +waitin +mov $01, $data CP_ME_INIT: - mov $02, 0x0002 - waitin - mov $01, $data +mov $02, 0x2 +waitin +mov $01, $data CP_MEM_WRITE: - mov $addr, 0x00a0 << 24 ; |NRT_ADDR - mov $02, 0x0004 - (xmov1)add $data, $02, $data - mov $addr, 0xa204 << 16 ; |NRT_DATA - (rep)(xmov3)mov $data, $data - waitin - mov $01, $data +mov $addr, 0xa0 << 24 ; |NRT_ADDR +mov $02, 0x4 +(xmov1)add $data, $02, $data +mov $addr, 0xa204 << 16 ; |NRT_DATA +(rep)(xmov3)mov $data, $data +waitin +mov $01, $data CP_SCRATCH_WRITE: - mov $02, 0x00ff - (rep)cwrite $data, [$02 + 0x001], 0x4 - waitin - mov $01, $data +mov $02, 0xff +(rep)cwrite $data, [$02 + @RB_RPTR], 0x4 +waitin +mov $01, $data CP_SET_SECURE_MODE: - mov $02, $data - setsecure $02, #l000 - l001: jump #l001 - nop - l000: waitin - mov $01, $data -fxn00: - l004: cmp $04, $02, $03 - breq $04, b0, #l002 - brne $04, b1, #l003 - breq $04, b2, #l004 - sub $03, $03, $02 - l003: jump #l004 - sub $02, $02, $03 - l002: ret - nop +mov $02, $data +setsecure $02, #l52 +l50: +jump #l50 +nop +l52: +waitin +mov $01, $data + +fxn54: +l54: +cmp $04, $02, $03 +breq $04, b0, #l61 +brne $04, b1, #l59 +breq $04, b2, #l54 +sub $03, $03, $02 +l59: +jump #l54 +sub $02, $02, $03 +l61: +ret +nop CP_REG_RMW: - cwrite $data, [$00 + @REG_READ_ADDR], 0x0 - add $02, $regdata, 0x0042 - addhi $03, $00, $regdata - sub $02, $02, $regdata - call #fxn00 - subhi $03, $03, $regdata - and $02, $02, $regdata - or $02, $02, 0x0001 - xor $02, $02, 0x0001 - not $02, $02 - shl $02, $02, $regdata - ushr $02, $02, $regdata - ishr $02, $02, $regdata - rot $02, $02, $regdata - min $02, $02, $regdata - max $02, $02, $regdata - mul8 $02, $02, $regdata - msb $02, $02 - mov $usraddr, $data - mov $data, $02 - waitin - mov $01, $data +cwrite $data, [$00 + @REG_READ_ADDR], 0x0 +add $02, $regdata, 0x42 +addhi $03, $00, $regdata +sub $02, $02, $regdata +call #fxn54 +subhi $03, $03, $regdata +and $02, $02, $regdata +or $02, $02, 0x1 +xor $02, $02, 0x1 +not $02, $02 +shl $02, $02, $regdata +ushr $02, $02, $regdata +ishr $02, $02, $regdata +rot $02, $02, $regdata +min $02, $02, $regdata +max $02, $02, $regdata +mul8 $02, $02, $regdata +msb $02, $02 +mov $usraddr, $data +mov $data, $02 +waitin +mov $01, $data CP_MEMCPY: - mov $02, $data - mov $03, $data - mov $04, $data - mov $05, $data - mov $06, $data - l006: breq $06, 0x0, #l005 - cwrite $03, [$00 + @LOAD_STORE_HI], 0x0 - load $07, [$02 + 0x004], 0x4 - cwrite $05, [$00 + @LOAD_STORE_HI], 0x0 - jump #l006 - store $07, [$04 + 0x004], 0x4 - l005: waitin - mov $01, $data +mov $02, $data +mov $03, $data +mov $04, $data +mov $05, $data +mov $06, $data +l90: +breq $06, 0x0, #l96 +cwrite $03, [$00 + @LOAD_STORE_HI], 0x0 +load $07, [$02 + 0x4], 0x4 +cwrite $05, [$00 + @LOAD_STORE_HI], 0x0 +jump #l90 +store $07, [$04 + 0x4], 0x4 +l96: +waitin +mov $01, $data CP_MEM_TO_MEM: - cwrite $data, [$00 + @MEM_READ_ADDR], 0x0 - cwrite $data, [$00 + @MEM_READ_ADDR+0x1], 0x0 - mov $02, $data - cwrite $data, [$00 + @LOAD_STORE_HI], 0x0 - mov $rem, $data - cwrite $rem, [$00 + @MEM_READ_DWORDS], 0x0 - (rep)store $memdata, [$02 + 0x004], 0x4 - waitin - mov $01, $data +cwrite $data, [$00 + @MEM_READ_ADDR], 0x0 +cwrite $data, [$00 + @MEM_READ_ADDR+0x1], 0x0 +mov $02, $data +cwrite $data, [$00 + @LOAD_STORE_HI], 0x0 +mov $rem, $data +cwrite $rem, [$00 + @MEM_READ_DWORDS], 0x0 +(rep)store $memdata, [$02 + 0x4], 0x4 +waitin +mov $01, $data IN_PREEMPT: - cread $02, [$00 + 0x101], 0x0 - brne $02, 0x1, #l007 - nop - preemptleave #l001 - nop - nop - nop - waitin - mov $01, $data - l007: iret - nop +cread $02, [$00 + 0x101], 0x0 +brne $02, 0x1, #l116 +nop +preemptleave #l50 +nop +nop +nop +waitin +mov $01, $data +l116: +iret +nop UNKN0: UNKN1: @@ -257,133 +266,133 @@ UNKN124: UNKN125: UNKN126: UNKN127: - waitin - mov $01, $data - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [0000006b] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [0000003f] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000025] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000022] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [0000002c] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000030] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000062] ; nop - [00000076] ; nop - [00000055] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop - [00000076] ; nop +waitin +mov $01, $data +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[0000006b] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[0000003f] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000025] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000022] +[00000076] +[00000076] +[00000076] +[0000002c] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000030] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000062] +[00000076] +[00000055] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] +[00000076] diff --git a/src/freedreno/afuc/afuc.h b/src/freedreno/afuc/afuc.h index 059b24cef3e..2ea26003770 100644 --- a/src/freedreno/afuc/afuc.h +++ b/src/freedreno/afuc/afuc.h @@ -38,38 +38,36 @@ if flag set, copy cmdstream bo contents into RB instead of IB'ing to it from RB. */ -/* The opcode is encoded variable length. Opcodes less than 0x30 - * are encoded as 5 bits followed by (rep) flag. Opcodes >= 0x30 - * (ie. top two bits are '11' are encoded as 6 bits. See get_opc() - */ typedef enum { - OPC_NOP = 0x00, + OPC_NOP, - OPC_ADD = 0x01, /* add immediate */ - OPC_ADDHI = 0x02, /* add immediate (hi 32b of 64b) */ - OPC_SUB = 0x03, /* subtract immediate */ - OPC_SUBHI = 0x04, /* subtract immediate (hi 32b of 64b) */ - OPC_AND = 0x05, /* AND immediate */ - OPC_OR = 0x06, /* OR immediate */ - OPC_XOR = 0x07, /* XOR immediate */ - OPC_NOT = 0x08, /* bitwise not of immed (src1 ignored) */ - OPC_SHL = 0x09, /* shift-left immediate */ - OPC_USHR = 0x0a, /* unsigned shift right by immediate */ - OPC_ISHR = 0x0b, /* signed shift right by immediate */ - OPC_ROT = 0x0c, /* rotate left (left shift with wrap-around) */ - OPC_MUL8 = 0x0d, /* 8bit multiply by immediate */ - OPC_MIN = 0x0e, - OPC_MAX = 0x0f, - OPC_CMP = 0x10, /* compare src to immed */ - OPC_MOVI = 0x11, /* move immediate */ +#define ALU(name) \ + OPC_##name, \ + OPC_##name##I, + ALU(ADD) /* add immediate */ + ALU(ADDHI) /* add immediate (hi 32b of 64b) */ + ALU(SUB) /* subtract immediate */ + ALU(SUBHI) /* subtract immediate (hi 32b of 64b) */ + ALU(AND) /* AND immediate */ + ALU(OR) /* OR immediate */ + ALU(XOR) /* XOR immediate */ + ALU(NOT) /* bitwise not of immed (src1 ignored) */ + ALU(SHL) /* shift-left immediate */ + ALU(USHR) /* unsigned shift right by immediate */ + ALU(ISHR) /* signed shift right by immediate */ + ALU(ROT) /* rotate left (left shift with wrap-around) */ + ALU(MUL8) /* 8bit multiply by immediate */ + ALU(MIN) + ALU(MAX) + ALU(CMP) /* compare src to immed */ + OPC_MOVI, /* move immediate */ +#undef ALU /* Return the most-significant bit of src2, or 0 if src2 == 0 (the * same as if src2 == 1). src1 is ignored. Note that this overlaps - * with STORE6, so it can only be used with the two-source encoding. + * with STORE, so it can only be used with the two-source encoding. */ - OPC_MSB = 0x14, - - OPC_ALU = 0x13, /* ALU instruction with two src registers */ + OPC_MSB, /* These seem something to do with setting some external state.. * doesn't seem to map *directly* to registers, but I guess that @@ -90,26 +88,31 @@ typedef enum { * 0x0b22->0x0b24 (IB2). Presumably $05 ends up w/ different value * for RB->IB1 vs IB1->IB2. */ - OPC_CWRITE5 = 0x15, - OPC_CREAD5 = 0x16, + OPC_CWRITE, + OPC_CREAD, - /* A6xx shuffled around the cwrite/cread opcodes and added new opcodes - * that let you read/write directly to memory (and bypass the IOMMU?). + /* A6xx added new opcodes that let you read/write directly to memory (and + * bypass the IOMMU?). */ - OPC_STORE6 = 0x14, - OPC_CWRITE6 = 0x15, - OPC_LOAD6 = 0x16, - OPC_CREAD6 = 0x17, + OPC_STORE, + OPC_LOAD, - OPC_BRNEI = 0x30, /* relative branch (if $src != immed) */ - OPC_BREQI = 0x31, /* relative branch (if $src == immed) */ - OPC_BRNEB = 0x32, /* relative branch (if bit not set) */ - OPC_BREQB = 0x33, /* relative branch (if bit is set) */ - OPC_RET = 0x34, /* return */ - OPC_CALL = 0x35, /* "function" call */ - OPC_WIN = 0x36, /* wait for input (ie. wait for WPTR to advance) */ - OPC_PREEMPTLEAVE6 = 0x38, /* try to leave preemption */ - OPC_SETSECURE = 0x3b, /* switch secure mode on/off */ + OPC_BRNEI, /* relative branch (if $src != immed) */ + OPC_BREQI, /* relative branch (if $src == immed) */ + OPC_BRNEB, /* relative branch (if bit not set) */ + OPC_BREQB, /* relative branch (if bit is set) */ + OPC_RET, /* return */ + OPC_IRET, /* return from preemption interrupt handler */ + OPC_CALL, /* "function" call */ + OPC_WAITIN, /* wait for input (ie. wait for WPTR to advance) */ + OPC_PREEMPTLEAVE, /* try to leave preemption */ + OPC_SETSECURE, /* switch secure mode on/off */ + + /* pseudo-opcodes without an actual encoding */ + OPC_BREQ, + OPC_BRNE, + OPC_JUMP, + OPC_RAW_LITERAL, } afuc_opc; /** @@ -141,97 +144,27 @@ typedef enum { REG_DATA = 0x1f, } afuc_reg; -typedef union PACKED { - /* addi, subi, andi, ori, xori, etc: */ - struct PACKED { - uint32_t uimm : 16; - uint32_t dst : 5; - uint32_t src : 5; - uint32_t hdr : 6; - } alui; - struct PACKED { - uint32_t uimm : 16; - uint32_t dst : 5; - uint32_t shift : 5; - uint32_t hdr : 6; - } movi; - struct PACKED { - uint32_t alu : 5; - uint32_t pad : 4; - uint32_t xmov : 2; /* execute eXtra mov's based on $rem */ - uint32_t dst : 5; - uint32_t src2 : 5; - uint32_t src1 : 5; - uint32_t hdr : 6; - } alu; - struct PACKED { - uint32_t uimm : 12; - /* TODO this needs to be confirmed: - * - * flags: - * 0x4 - post-increment src2 by uimm (need to confirm this is also - * true for load/cread). TBD whether, when used in conjunction - * with @LOAD_STORE_HI, 32b rollover works properly. - * - * other values tbd, also need to confirm if different bits can be - * set together (I don't see examples of this in existing fw) - */ - uint32_t flags : 4; - uint32_t src1 : 5; /* dst (cread) or src (cwrite) register */ - uint32_t src2 : 5; /* read or write address is src2+uimm */ - uint32_t hdr : 6; - } control; - struct PACKED { - int32_t ioff : 16; /* relative offset */ - uint32_t bit_or_imm : 5; - uint32_t src : 5; - uint32_t hdr : 6; - } br; - struct PACKED { - uint32_t uoff : 26; /* absolute (unsigned) offset */ - uint32_t hdr : 6; - } call; - struct PACKED { - uint32_t pad : 25; - uint32_t interrupt : 1; /* return from ctxt-switch interrupt handler */ - uint32_t hdr : 6; - } ret; - struct PACKED { - uint32_t pad : 26; - uint32_t hdr : 6; - } waitin; - struct PACKED { - uint32_t pad : 26; - uint32_t opc_r : 6; - }; +struct afuc_instr { + afuc_opc opc; -} afuc_instr; + uint8_t dst; + uint8_t src1; + uint8_t src2; + uint32_t immed; + uint8_t shift; + uint8_t bit; + uint8_t xmov; + uint32_t literal; + int offset; + const char *label; -static inline void -afuc_get_opc(afuc_instr *ai, afuc_opc *opc, bool *rep) -{ - if (ai->opc_r < 0x30) { - *opc = ai->opc_r >> 1; - *rep = ai->opc_r & 0x1; - } else { - *opc = ai->opc_r; - *rep = false; - } -} + bool has_immed : 1; + bool has_shift : 1; + bool has_bit : 1; + bool is_literal : 1; + bool rep : 1; +}; -static inline void -afuc_set_opc(afuc_instr *ai, afuc_opc opc, bool rep) -{ - if (opc < 0x30) { - ai->opc_r = opc << 1; - ai->opc_r |= !!rep; - } else { - ai->opc_r = opc; - } -} - -void print_src(unsigned reg); -void print_dst(unsigned reg); void print_control_reg(uint32_t id); void print_pipe_reg(uint32_t id); diff --git a/src/freedreno/afuc/afuc.xml b/src/freedreno/afuc/afuc.xml new file mode 100644 index 00000000000..8eff4647540 --- /dev/null +++ b/src/freedreno/afuc/afuc.xml @@ -0,0 +1,632 @@ + + + + + + + + Encoding of an afuc instruction. All instructions are 32b. + + + src->xmov + src->dst + src->src1 + src->src2 + src->immed + + src->immed + + + + + + special registers for operands used as a source + + + + + + + + special registers for operands used as a destination + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {SPECIALREG} >= 0x1c + + + + + + {SPECIALREG} + + + + + {REG} + + + + + src + src + + + + + + + + {SPECIALREG} + + + + + {REG} + + + + + src + src + + + + + + + + + src->rep + + + + + Execute eXtra mov's based on $rem + + + + + + + + + {REP}{XMOV}{NAME} {DST}, {SRC1}, {SRC2} + + + xxxx + + + + + 10011 + + + + + {REP}{XMOV}{NAME} {DST}, {SRC1} + + + xxxx + + + + xxxxx + 10011 + + + + + {REP}{NAME} {DST}, {SRC1}, 0x{RIMMED} + + + + + + + + + + {REP}{NAME} {DST}, 0x{IMMED} + + + + + xxxxx + + + + add and write carry flag + 00001 + + + + 00001 + + + + Perform high 32 bits of 64-bit addition, using carry flag written by add + 00010 + + + + 00010 + + + + subtract and write carry flag + 00011 + + + + 00011 + + + + Perform high 32 bits of 64-bit subtraction, using carry flag written by sub + 00100 + + + + 00100 + + + + 00101 + + + + 00101 + + + + + + {SRC1} == 0 + + {REP}{XMOV}mov {DST}, {SRC2} + + + 00110 + + + + 00110 + + + + 00111 + + + + 00111 + + + + 01000 + + + + 01000 + + + + 01001 + + + + 01001 + + + + 0-extending right shift + 01010 + + + + 01010 + + + + sign-extending right shift + 01011 + + + + 01011 + + + + Rotate left (left shift with wraparound) + 01100 + + + + 01100 + + + + Multiply low 8 bits of each source to produce a 16-bit result + 01101 + + + + 01101 + + + + Unsigned minimum + 01110 + + + + 01110 + + + + Unsigned maximum + 01111 + + + + 01111 + + + + + Compare two sources and produce a bitfield: + - 0x00 if src1 > src2 + - 0x2b if src1 == src2 + - 0x1e if src1 < src2 + Often a "branch on bit set/unset" instruction is used on the + result to implement a compare-and-branch macro. + + 10000 + + + + 10000 + + + + Return the most-significant bit of src2, or 0 if src2 == 0 + 10100 + + + + Special move-immediate instruction with a shift + + {SHIFT} == 0 + + {REP}mov {DST}, 0x{RIMMED} + + + + {REP}mov {DST}, 0x{RIMMED} << {SHIFT} + + + + + + 10001 + + + src->shift + + + + + + + + + src->bit + + + + + + + {REP}store {SRC}, [{OFFSET} + 0x{IMMED}], 0x{FLAGS} + + + + Store to memory directly. Mainly used by preemption to avoid + disturbing FIFO state before it is saved and after it is + restored. + + + + + 10100 + + + src->src1 + src->src2 + + + + + Write to a control register. + + {REP}cwrite {SRC}, [{OFFSET} + {CONTROLREG}], 0x{FLAGS} + + + + + 10101 + + + src->src1 + src->src2 + src->immed + + + + + + + Load from memory directly. Mainly used by preemption to avoid + disturbing FIFO state before it is saved and after it is + restored. + + + {REP}load {DST}, [{OFFSET} + 0x{IMMED}], 0x{FLAGS} + + + + + 10110 + + + src->src1 + + + + + Read from a control register. + + {REP}cread {DST}, [{OFFSET} + {CONTROLREG}], 0x{FLAGS} + + + + + + + src->src1 + src->immed + + + + + + 10110 + + + + + + 10111 + + + + + + + + src->offset + src->src1 + + + + + + {NAME} {SRC}, 0x{IMMED}, #{OFFSET} + + + + + + + {NAME} {SRC}, b{BIT}, #{OFFSET} + + + + + src->bit + + + + + Branch if not equal to an immediate. + 110000 + + + + Branch if equal to an immediate. + 110001 + + + + Branch if a bit is not set. + + + + ({BIT} == 0) && ({SRC} == 0) + + + jump #{OFFSET} + + + 110010 + + + + Branch if a bit is set. + 110011 + + + + + {NAME} + + xxxxxxxxxxxxxxxxxxxxxxxxxx + + + + + {NAME} + + 110100 + xxxxxxxxxxxxxxxxxxxxxxxxx + + + + 0 + + + + Return from preemption interrupt handler. + 1 + + + + + call #{TARGET} + + + + 110101 + + + src->literal + + + + + + A special branch instruction that parses the next PM4 packet + header in $data and jumps to the packet handler routine. By + convention the delay slot always contains a "mov $01, $data" + instruction, so that $01 contains the packet header when + processing the next packet. + + + waitin + + 110110 + + + + + Try to leave the preempt handler without jumping back to the + instruction that was interrupted. Jumps to the given destination + if this fails. + + + preemptleave #{TARGET} + + + + 111000 + + + src->literal + + + + + 3 + + + + + Call the zap shader fw to switch into/out of secure mode. Skips + the next two instructions if successful. + + + setsecure $02, #{TARGET} + + + 111011 + + + + + + + {REP}nop + + 00000000000000000000000000 + 00000 + + + + + + + {REP}nop + + 01000000000000000000000000 + 00000 + + + diff --git a/src/freedreno/afuc/asm.c b/src/freedreno/afuc/asm.c index 29989f69ddc..545aac09a54 100644 --- a/src/freedreno/afuc/asm.c +++ b/src/freedreno/afuc/asm.c @@ -34,26 +34,68 @@ #include #include "util/macros.h" +#include "util/log.h" #include "afuc.h" #include "asm.h" #include "parser.h" #include "util.h" +struct encode_state { + unsigned gen; +}; + +static afuc_opc +__instruction_case(struct encode_state *s, struct afuc_instr *instr) +{ + switch (instr->opc) { +#define ALU(name) \ + case OPC_##name: \ + if (instr->has_immed) \ + return OPC_##name##I; \ + break; + + ALU(ADD) + ALU(ADDHI) + ALU(SUB) + ALU(SUBHI) + ALU(AND) + ALU(OR) + ALU(XOR) + ALU(NOT) + ALU(SHL) + ALU(USHR) + ALU(ISHR) + ALU(ROT) + ALU(MUL8) + ALU(MIN) + ALU(MAX) + ALU(CMP) +#undef ALU + + default: + break; + } + + return instr->opc; +} + +#include "encode.h" + int gpuver; /* bit lame to hard-code max but fw sizes are small */ -static struct asm_instruction instructions[0x2000]; +static struct afuc_instr instructions[0x2000]; static unsigned num_instructions; static struct asm_label labels[0x512]; static unsigned num_labels; -struct asm_instruction * -next_instr(int tok) +struct afuc_instr * +next_instr(afuc_opc opc) { - struct asm_instruction *ai = &instructions[num_instructions++]; + struct afuc_instr *ai = &instructions[num_instructions++]; assert(num_instructions < ARRAY_SIZE(instructions)); - ai->tok = tok; + ai->opc = opc; return ai; } @@ -85,245 +127,79 @@ resolve_label(const char *str) exit(2); } -static afuc_opc -tok2alu(int tok) -{ - switch (tok) { - case T_OP_ADD: - return OPC_ADD; - case T_OP_ADDHI: - return OPC_ADDHI; - case T_OP_SUB: - return OPC_SUB; - case T_OP_SUBHI: - return OPC_SUBHI; - case T_OP_AND: - return OPC_AND; - case T_OP_OR: - return OPC_OR; - case T_OP_XOR: - return OPC_XOR; - case T_OP_NOT: - return OPC_NOT; - case T_OP_SHL: - return OPC_SHL; - case T_OP_USHR: - return OPC_USHR; - case T_OP_ISHR: - return OPC_ISHR; - case T_OP_ROT: - return OPC_ROT; - case T_OP_MUL8: - return OPC_MUL8; - case T_OP_MIN: - return OPC_MIN; - case T_OP_MAX: - return OPC_MAX; - case T_OP_CMP: - return OPC_CMP; - case T_OP_MSB: - return OPC_MSB; - default: - assert(0); - return -1; - } -} - static void emit_instructions(int outfd) { int i; + struct encode_state s = { + .gen = gpuver, + }; + /* there is an extra 0x00000000 which kernel strips off.. we could * perhaps use it for versioning. */ i = 0; write(outfd, &i, 4); + /* Expand some meta opcodes, and resolve branch targets */ for (i = 0; i < num_instructions; i++) { - struct asm_instruction *ai = &instructions[i]; - afuc_instr instr = {0}; - afuc_opc opc; + struct afuc_instr *ai = &instructions[i]; + + switch (ai->opc) { + case OPC_BREQ: + ai->offset = resolve_label(ai->label) - i; + if (ai->has_bit) + ai->opc = OPC_BREQB; + else + ai->opc = OPC_BREQI; + break; + + case OPC_BRNE: + ai->offset = resolve_label(ai->label) - i; + if (ai->has_bit) + ai->opc = OPC_BRNEB; + else + ai->opc = OPC_BRNEI; + break; + + case OPC_JUMP: + ai->offset = resolve_label(ai->label) - i; + ai->opc = OPC_BRNEB; + ai->src1 = 0; + ai->bit = 0; + break; + + case OPC_CALL: + case OPC_PREEMPTLEAVE: + ai->literal = resolve_label(ai->label); + break; + + case OPC_MOVI: + if (ai->label) + ai->immed = resolve_label(ai->label); + break; + + default: + break; + } /* special case, 2nd dword is patched up w/ # of instructions * (ie. offset of jmptbl) */ if (i == 1) { - assert(ai->is_literal); + assert(ai->opc == OPC_RAW_LITERAL); ai->literal &= ~0xffff; ai->literal |= num_instructions; } - if (ai->is_literal) { + if (ai->opc == OPC_RAW_LITERAL) { write(outfd, &ai->literal, 4); continue; } - switch (ai->tok) { - case T_OP_NOP: - opc = OPC_NOP; - if (gpuver >= 6) - instr.pad = 0x1000000; - break; - case T_OP_ADD: - case T_OP_ADDHI: - case T_OP_SUB: - case T_OP_SUBHI: - case T_OP_AND: - case T_OP_OR: - case T_OP_XOR: - case T_OP_NOT: - case T_OP_SHL: - case T_OP_USHR: - case T_OP_ISHR: - case T_OP_ROT: - case T_OP_MUL8: - case T_OP_MIN: - case T_OP_MAX: - case T_OP_CMP: - case T_OP_MSB: - if (ai->has_immed) { - /* MSB overlaps with STORE */ - assert(ai->tok != T_OP_MSB); - if (ai->xmov) { - fprintf(stderr, - "ALU instruction cannot have immediate and xmov\n"); - exit(1); - } - opc = tok2alu(ai->tok); - instr.alui.dst = ai->dst; - instr.alui.src = ai->src1; - instr.alui.uimm = ai->immed; - } else { - opc = OPC_ALU; - instr.alu.dst = ai->dst; - instr.alu.src1 = ai->src1; - instr.alu.src2 = ai->src2; - instr.alu.xmov = ai->xmov; - instr.alu.alu = tok2alu(ai->tok); - } - break; - case T_OP_MOV: - /* move can either be encoded as movi (ie. move w/ immed) or - * an alu instruction - */ - if ((ai->has_immed || ai->label) && ai->xmov) { - fprintf(stderr, "ALU instruction cannot have immediate and xmov\n"); - exit(1); - } - if (ai->has_immed) { - opc = OPC_MOVI; - instr.movi.dst = ai->dst; - instr.movi.uimm = ai->immed; - instr.movi.shift = ai->shift; - } else if (ai->label) { - /* mov w/ a label is just an alias for an immediate, this - * is useful to load the address of a constant table into - * a register: - */ - opc = OPC_MOVI; - instr.movi.dst = ai->dst; - instr.movi.uimm = resolve_label(ai->label); - instr.movi.shift = ai->shift; - } else { - /* encode as: or $dst, $00, $src */ - opc = OPC_ALU; - instr.alu.dst = ai->dst; - instr.alu.src1 = 0x00; /* $00 reads-back 0 */ - instr.alu.src2 = ai->src1; - instr.alu.xmov = ai->xmov; - instr.alu.alu = OPC_OR; - } - break; - case T_OP_CWRITE: - case T_OP_CREAD: - case T_OP_STORE: - case T_OP_LOAD: - if (gpuver >= 6) { - if (ai->tok == T_OP_CWRITE) { - opc = OPC_CWRITE6; - } else if (ai->tok == T_OP_CREAD) { - opc = OPC_CREAD6; - } else if (ai->tok == T_OP_STORE) { - opc = OPC_STORE6; - } else if (ai->tok == T_OP_LOAD) { - opc = OPC_LOAD6; - } else { - unreachable(""); - } - } else { - if (ai->tok == T_OP_CWRITE) { - opc = OPC_CWRITE5; - } else if (ai->tok == T_OP_CREAD) { - opc = OPC_CREAD5; - } else if (ai->tok == T_OP_STORE || ai->tok == T_OP_LOAD) { - fprintf(stderr, "load and store do not exist on a5xx\n"); - exit(1); - } else { - unreachable(""); - } - } - instr.control.src1 = ai->src1; - instr.control.src2 = ai->src2; - instr.control.flags = ai->bit; - instr.control.uimm = ai->immed; - break; - case T_OP_BRNE: - case T_OP_BREQ: - if (ai->has_immed) { - opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEI : OPC_BREQI; - instr.br.bit_or_imm = ai->immed; - } else { - opc = (ai->tok == T_OP_BRNE) ? OPC_BRNEB : OPC_BREQB; - instr.br.bit_or_imm = ai->bit; - } - instr.br.src = ai->src1; - instr.br.ioff = resolve_label(ai->label) - i; - break; - case T_OP_RET: - opc = OPC_RET; - break; - case T_OP_IRET: - opc = OPC_RET; - instr.ret.interrupt = 1; - break; - case T_OP_CALL: - opc = OPC_CALL; - instr.call.uoff = resolve_label(ai->label); - break; - case T_OP_PREEMPTLEAVE: - opc = OPC_PREEMPTLEAVE6; - instr.call.uoff = resolve_label(ai->label); - break; - case T_OP_SETSECURE: - opc = OPC_SETSECURE; - if (resolve_label(ai->label) != i + 3) { - fprintf(stderr, "jump label %s is incorrect for setsecure\n", - ai->label); - exit(1); - } - if (ai->src1 != 0x2) { - fprintf(stderr, "source for setsecure must be $02\n"); - exit(1); - } - break; - case T_OP_JUMP: - /* encode jump as: brne $00, b0, #label */ - opc = OPC_BRNEB; - instr.br.bit_or_imm = 0; - instr.br.src = 0x00; /* $00 reads-back 0.. compare to 0 */ - instr.br.ioff = resolve_label(ai->label) - i; - break; - case T_OP_WAITIN: - opc = OPC_WIN; - break; - default: - unreachable(""); - } - - afuc_set_opc(&instr, opc, ai->rep); - - write(outfd, &instr, 4); + uint32_t encoded = bitmask_to_uint64_t(encode__instruction(&s, NULL, ai)); + write(outfd, &encoded, 4); } } diff --git a/src/freedreno/afuc/asm.h b/src/freedreno/afuc/asm.h index da75c311e68..fa899fe531c 100644 --- a/src/freedreno/afuc/asm.h +++ b/src/freedreno/afuc/asm.h @@ -30,37 +30,12 @@ extern int gpuver; -/** - * Intermediate representation for an instruction, before final encoding. - * This mostly exists because we need to resolve label offset's in a 2nd - * pass, but also so that parser.y doesn't really need to care so much - * about the different encodings for 2src regs vs 1src+immed, or mnemonics - */ -struct asm_instruction { - int tok; - int dst; - int src1; - int src2; - int immed; - int shift; - int bit; - int xmov; - uint32_t literal; - const char *label; - - bool has_immed : 1; - bool has_shift : 1; - bool has_bit : 1; - bool is_literal : 1; - bool rep : 1; -}; - struct asm_label { unsigned offset; const char *label; }; -struct asm_instruction *next_instr(int tok); +struct afuc_instr *next_instr(afuc_opc opc); void decl_label(const char *str); static inline uint32_t diff --git a/src/freedreno/afuc/disasm.c b/src/freedreno/afuc/disasm.c index 9b19645b606..adfb21d84c7 100644 --- a/src/freedreno/afuc/disasm.c +++ b/src/freedreno/afuc/disasm.c @@ -35,13 +35,15 @@ #include "util/os_file.h" +#include "compiler/isaspec/isaspec.h" + #include "freedreno_pm4.h" #include "afuc.h" #include "util.h" #include "emu.h" -static int gpuver; +int gpuver; /* non-verbose mode should output something suitable to feed back into * assembler.. verbose mode has additional output useful for debugging @@ -52,213 +54,26 @@ static bool verbose = false; /* emulator mode: */ static bool emulator = false; -static void -print_gpu_reg(uint32_t regbase) -{ - if (regbase < 0x100) - return; - - char *name = afuc_gpu_reg_name(regbase); - if (name) { - printf("\t; %s", name); - free(name); - } -} - #define printerr(fmt, ...) afuc_printc(AFUC_ERR, fmt, ##__VA_ARGS__) #define printlbl(fmt, ...) afuc_printc(AFUC_LBL, fmt, ##__VA_ARGS__) -void -print_src(unsigned reg) -{ - if (reg == REG_REM) - printf("$rem"); /* remainding dwords in packet */ - else if (reg == REG_MEMDATA) - printf("$memdata"); - else if (reg == REG_REGDATA) - printf("$regdata"); - else if (reg == REG_DATA) - printf("$data"); - else - printf("$%02x", reg); -} - -void -print_dst(unsigned reg) -{ - if (reg == REG_REM) - printf("$rem"); /* remainding dwords in packet */ - else if (reg == REG_ADDR) - printf("$addr"); - else if (reg == REG_USRADDR) - printf("$usraddr"); - else if (reg == REG_DATA) - printf("$data"); - else - printf("$%02x", reg); -} - -static void -print_alu_name(afuc_opc opc, uint32_t instr) -{ - if (opc == OPC_ADD) { - printf("add "); - } else if (opc == OPC_ADDHI) { - printf("addhi "); - } else if (opc == OPC_SUB) { - printf("sub "); - } else if (opc == OPC_SUBHI) { - printf("subhi "); - } else if (opc == OPC_AND) { - printf("and "); - } else if (opc == OPC_OR) { - printf("or "); - } else if (opc == OPC_XOR) { - printf("xor "); - } else if (opc == OPC_NOT) { - printf("not "); - } else if (opc == OPC_SHL) { - printf("shl "); - } else if (opc == OPC_USHR) { - printf("ushr "); - } else if (opc == OPC_ISHR) { - printf("ishr "); - } else if (opc == OPC_ROT) { - printf("rot "); - } else if (opc == OPC_MUL8) { - printf("mul8 "); - } else if (opc == OPC_MIN) { - printf("min "); - } else if (opc == OPC_MAX) { - printf("max "); - } else if (opc == OPC_CMP) { - printf("cmp "); - } else if (opc == OPC_MSB) { - printf("msb "); - } else { - printerr("[%08x]", instr); - printf(" ; alu%02x ", opc); - } -} - static const char * getpm4(uint32_t id) { return afuc_pm_id_name(id); } -static struct { - uint32_t offset; - uint32_t num_jump_labels; - uint32_t jump_labels[256]; -} jump_labels[1024]; -int num_jump_labels; - static void -add_jump_table_entry(uint32_t n, uint32_t offset) +print_gpu_reg(FILE *out, uint32_t regbase) { - int i; - - if (n > 128) /* can't possibly be a PM4 PKT3.. */ + if (regbase < 0x100) return; - for (i = 0; i < num_jump_labels; i++) - if (jump_labels[i].offset == offset) - goto add_label; - - num_jump_labels = i + 1; - jump_labels[i].offset = offset; - jump_labels[i].num_jump_labels = 0; - -add_label: - jump_labels[i].jump_labels[jump_labels[i].num_jump_labels++] = n; - assert(jump_labels[i].num_jump_labels < 256); -} - -static int -get_jump_table_entry(uint32_t offset) -{ - int i; - - for (i = 0; i < num_jump_labels; i++) - if (jump_labels[i].offset == offset) - return i; - - return -1; -} - -static uint32_t label_offsets[0x512]; -static int num_label_offsets; - -static int -label_idx(uint32_t offset, bool create) -{ - int i; - for (i = 0; i < num_label_offsets; i++) - if (offset == label_offsets[i]) - return i; - if (!create) - return -1; - label_offsets[i] = offset; - num_label_offsets = i + 1; - return i; -} - -static const char * -label_name(uint32_t offset, bool allow_jt) -{ - static char name[12]; - int lidx; - - if (allow_jt) { - lidx = get_jump_table_entry(offset); - if (lidx >= 0) { - int j; - for (j = 0; j < jump_labels[lidx].num_jump_labels; j++) { - uint32_t jump_label = jump_labels[lidx].jump_labels[j]; - const char *str = getpm4(jump_label); - if (str) - return str; - } - // if we don't find anything w/ known name, maybe we should - // return UNKN%d to at least make it clear that this is some - // sort of jump-table entry? - } + char *name = afuc_gpu_reg_name(regbase); + if (name) { + fprintf(out, "\t; %s", name); + free(name); } - - lidx = label_idx(offset, false); - if (lidx < 0) - return NULL; - sprintf(name, "l%03d", lidx); - return name; -} - -static uint32_t fxn_offsets[0x512]; -static int num_fxn_offsets; - -static int -fxn_idx(uint32_t offset, bool create) -{ - int i; - for (i = 0; i < num_fxn_offsets; i++) - if (offset == fxn_offsets[i]) - return i; - if (!create) - return -1; - fxn_offsets[i] = offset; - num_fxn_offsets = i + 1; - return i; -} - -static const char * -fxn_name(uint32_t offset) -{ - static char name[14]; - int fidx = fxn_idx(offset, false); - if (fidx < 0) - return NULL; - sprintf(name, "fxn%02d", fidx); - return name; } void @@ -285,476 +100,125 @@ print_pipe_reg(uint32_t id) } } +struct decode_state { + uint32_t immed; + uint8_t shift; + bool has_immed; + bool dst_is_addr; +}; + static void -disasm_instr(uint32_t *instrs, unsigned pc) +field_print_cb(struct isa_print_state *state, const char *field_name, uint64_t val) { - int jump_label_idx; - afuc_instr *instr = (void *)&instrs[pc]; - const char *fname, *lname; - afuc_opc opc; - bool rep; - - afuc_get_opc(instr, &opc, &rep); - - lname = label_name(pc, false); - fname = fxn_name(pc); - jump_label_idx = get_jump_table_entry(pc); - - if (jump_label_idx >= 0) { - int j; - printf("\n"); - for (j = 0; j < jump_labels[jump_label_idx].num_jump_labels; j++) { - uint32_t jump_label = jump_labels[jump_label_idx].jump_labels[j]; - const char *name = getpm4(jump_label); - if (name) { - printlbl("%s", name); - } else { - printlbl("UNKN%d", jump_label); - } - printf(":\n"); - } - } - - if (fname) { - printlbl("%s", fname); - printf(":\n"); - } - - if (lname) { - printlbl(" %s", lname); - printf(":"); - } else { - printf(" "); - } - - if (verbose) { - printf("\t%04x: %08x ", pc, instrs[pc]); - } else { - printf(" "); - } - - switch (opc) { - case OPC_NOP: { - /* a6xx changed the default immediate, and apparently 0 - * is illegal now. - */ - const uint32_t nop = gpuver >= 6 ? 0x1000000 : 0x0; - if (instrs[pc] != nop) { - printerr("[%08x]", instrs[pc]); - printf(" ; "); - } - if (rep) - printf("(rep)"); - printf("nop"); - print_gpu_reg(instrs[pc]); - - break; - } - case OPC_ADD: - case OPC_ADDHI: - case OPC_SUB: - case OPC_SUBHI: - case OPC_AND: - case OPC_OR: - case OPC_XOR: - case OPC_NOT: - case OPC_SHL: - case OPC_USHR: - case OPC_ISHR: - case OPC_ROT: - case OPC_MUL8: - case OPC_MIN: - case OPC_MAX: - case OPC_CMP: { - bool src1 = true; - - if (opc == OPC_NOT) - src1 = false; - - if (rep) - printf("(rep)"); - - print_alu_name(opc, instrs[pc]); - print_dst(instr->alui.dst); - printf(", "); - if (src1) { - print_src(instr->alui.src); - printf(", "); - } - printf("0x%04x", instr->alui.uimm); - print_gpu_reg(instr->alui.uimm); - - /* print out unexpected bits: */ - if (verbose) { - if (instr->alui.src && !src1) - printerr(" (src=%02x)", instr->alui.src); - } - - break; - } - case OPC_MOVI: { - if (rep) - printf("(rep)"); - printf("mov "); - print_dst(instr->movi.dst); - printf(", 0x%04x", instr->movi.uimm); - if (instr->movi.shift) - printf(" << %u", instr->movi.shift); - - if ((instr->movi.dst == REG_ADDR) && (instr->movi.shift >= 16)) { - uint32_t val = (uint32_t)instr->movi.uimm << (uint32_t)instr->movi.shift; - val &= ~0x40000; /* b18 seems to be a flag */ - - if ((val & 0x00ffffff) == 0) { - printf("\t; "); - print_pipe_reg(val >> 24); - break; - } - } - /* using mov w/ << 16 is popular way to construct a pkt7 - * header to send (for ex, from PFP to ME), so check that - * case first - */ - if ((instr->movi.shift == 16) && - ((instr->movi.uimm & 0xff00) == 0x7000)) { - unsigned opc, p; - - opc = instr->movi.uimm & 0x7f; - p = pm4_odd_parity_bit(opc); - - /* So, you'd think that checking the parity bit would be - * a good way to rule out false positives, but seems like - * ME doesn't really care.. at least it would filter out - * things that look like actual legit packets between - * PFP and ME.. - */ - if (1 || p == ((instr->movi.uimm >> 7) & 0x1)) { - const char *name = getpm4(opc); - printf("\t; "); - if (name) - printlbl("%s", name); - else - printlbl("UNKN%u", opc); - break; - } - } - - print_gpu_reg((uint32_t)instr->movi.uimm << (uint32_t)instr->movi.shift); - - break; - } - case OPC_ALU: { - bool src1 = true; - - if (instr->alu.alu == OPC_NOT || instr->alu.alu == OPC_MSB) - src1 = false; - - if (instr->alu.pad) - printf("[%08x] ; ", instrs[pc]); - - if (rep) - printf("(rep)"); - if (instr->alu.xmov) - printf("(xmov%d)", instr->alu.xmov); - - /* special case mnemonics: - * reading $00 seems to always yield zero, and so: - * or $dst, $00, $src -> mov $dst, $src - * Maybe add one for negate too, ie. - * sub $dst, $00, $src ??? - */ - if ((instr->alu.alu == OPC_OR) && !instr->alu.src1) { - printf("mov "); - src1 = false; + if (!strcmp(field_name, "CONTROLREG")) { + char *name = afuc_control_reg_name(val); + if (name) { + isa_print(state, "@%s", name); + free(name); } else { - print_alu_name(instr->alu.alu, instrs[pc]); + isa_print(state, "0x%03x", (unsigned)val); } - - print_dst(instr->alu.dst); - if (src1) { - printf(", "); - print_src(instr->alu.src1); - } - printf(", "); - print_src(instr->alu.src2); - - /* print out unexpected bits: */ - if (verbose) { - if (instr->alu.pad) - printerr(" (pad=%01x)", instr->alu.pad); - if (instr->alu.src1 && !src1) - printerr(" (src1=%02x)", instr->alu.src1); - } - - /* xmov is a modifier that makes the processor execute up to 3 - * extra mov's after the current instruction. Given an ALU - * instruction: - * - * (xmovN) alu $dst, $src1, $src2 - * - * In all of the uses in the firmware blob, $dst and $src2 are one - * of the "special" registers $data, $addr, $addr2. I've observed - * that if $dst isn't "special" then it's replaced with $00 - * instead of $data, but I haven't checked what happens if $src2 - * isn't "special". Anyway, in the usual case, the HW produces a - * count M = min(N, $rem) and then does the following: - * - * M = 1: - * mov $data, $src2 - * - * M = 2: - * mov $data, $src2 - * mov $data, $src2 - * - * M = 3: - * mov $data, $src2 - * mov $dst, $src2 (special case for CP_CONTEXT_REG_BUNCH) - * mov $data, $src2 - * - * It seems to be frequently used in combination with (rep) to - * provide a kind of hardware-based loop unrolling, and there's - * even a special case in the ISA to be able to do this with - * CP_CONTEXT_REG_BUNCH. However (rep) isn't required. - * - * This dumps the expected extra instructions, assuming that $rem - * isn't too small. - */ - if (verbose && instr->alu.xmov) { - for (int i = 0; i < instr->alu.xmov; i++) { - printf("\n ; mov "); - if (instr->alu.dst < 0x1d) - printf("$00"); - else if (instr->alu.xmov == 3 && i == 1) - print_dst(instr->alu.dst); - else - printf("$data"); - printf(", "); - print_src(instr->alu.src2); - } - } - - break; } - case OPC_CWRITE6: - case OPC_CREAD6: - case OPC_STORE6: - case OPC_LOAD6: { - if (rep) - printf("(rep)"); - - bool is_control_reg = true; - bool is_store = true; - if (gpuver >= 6) { - switch (opc) { - case OPC_CWRITE6: - printf("cwrite "); - break; - case OPC_CREAD6: - is_store = false; - printf("cread "); - break; - case OPC_STORE6: - is_control_reg = false; - printf("store "); - break; - case OPC_LOAD6: - is_control_reg = false; - is_store = false; - printf("load "); - break; - default: - assert(!"unreachable"); - } - } else { - switch (opc) { - case OPC_CWRITE5: - printf("cwrite "); - break; - case OPC_CREAD5: - is_store = false; - printf("cread "); - break; - default: - fprintf(stderr, "A6xx control opcode on A5xx?\n"); - exit(1); - } - } - - if (is_store) - print_src(instr->control.src1); - else - print_dst(instr->control.src1); - printf(", ["); - print_src(instr->control.src2); - printf(" + "); - if (is_control_reg && instr->control.flags != 0x4) - print_control_reg(instr->control.uimm); - else - printf("0x%03x", instr->control.uimm); - printf("], 0x%x", instr->control.flags); - break; - } - case OPC_BRNEI: - case OPC_BREQI: - case OPC_BRNEB: - case OPC_BREQB: { - unsigned off = pc + instr->br.ioff; - - assert(!rep); - - /* Since $00 reads back zero, it can be used as src for - * unconditional branches. (This only really makes sense - * for the BREQB.. or possible BRNEI if imm==0.) - * - * If bit=0 then branch is taken if *all* bits are zero. - * Otherwise it is taken if bit (bit-1) is clear. - * - * Note the instruction after a jump/branch is executed - * regardless of whether branch is taken, so use nop or - * take that into account in code. - */ - if (instr->br.src || (opc != OPC_BRNEB)) { - bool immed = false; - - if (opc == OPC_BRNEI) { - printf("brne "); - immed = true; - } else if (opc == OPC_BREQI) { - printf("breq "); - immed = true; - } else if (opc == OPC_BRNEB) { - printf("brne "); - } else if (opc == OPC_BREQB) { - printf("breq "); - } - print_src(instr->br.src); - if (immed) { - printf(", 0x%x,", instr->br.bit_or_imm); - } else { - printf(", b%u,", instr->br.bit_or_imm); - } - } else { - printf("jump"); - if (verbose && instr->br.bit_or_imm) { - printerr(" (src=%03x, bit=%03x) ", instr->br.src, - instr->br.bit_or_imm); - } - } - - printf(" #"); - printlbl("%s", label_name(off, true)); - if (verbose) - printf(" (#%d, %04x)", instr->br.ioff, off); - break; - } - case OPC_CALL: - assert(!rep); - printf("call #"); - printlbl("%s", fxn_name(instr->call.uoff)); - if (verbose) { - printf(" (%04x)", instr->call.uoff); - if (instr->br.bit_or_imm || instr->br.src) { - printerr(" (src=%03x, bit=%03x) ", instr->br.src, - instr->br.bit_or_imm); - } - } - break; - case OPC_RET: - assert(!rep); - if (instr->ret.pad) - printf("[%08x] ; ", instrs[pc]); - if (instr->ret.interrupt) - printf("iret"); - else - printf("ret"); - break; - case OPC_WIN: - assert(!rep); - if (instr->waitin.pad) - printf("[%08x] ; ", instrs[pc]); - printf("waitin"); - if (verbose && instr->waitin.pad) - printerr(" (pad=%x)", instr->waitin.pad); - break; - case OPC_PREEMPTLEAVE6: - if (gpuver < 6) { - printf("[%08x] ; op38", instrs[pc]); - } else { - printf("preemptleave #"); - printlbl("%s", label_name(instr->call.uoff, true)); - } - break; - case OPC_SETSECURE: - /* Note: This seems to implicitly read the secure/not-secure state - * to set from the low bit of $02, and implicitly jumps to pc + 3 - * (i.e. skipping the next two instructions) if it succeeds. We - * print these implicit parameters to make reading the disassembly - * easier. - */ - if (instr->pad) - printf("[%08x] ; ", instrs[pc]); - printf("setsecure $02, #"); - printlbl("%s", label_name(pc + 3, true)); - break; - default: - printerr("[%08x]", instrs[pc]); - printf(" ; op%02x ", opc); - print_dst(instr->alui.dst); - printf(", "); - print_src(instr->alui.src); - print_gpu_reg(instrs[pc] & 0xffff); - break; - } - printf("\n"); } static void -setup_packet_table(uint32_t *jmptbl, uint32_t sizedwords) +pre_instr_cb(void *data, unsigned n, void *instr) { - num_jump_labels = 0; + struct decode_state *state = data; + state->has_immed = state->dst_is_addr = false; + state->shift = 0; + + if (verbose) + printf("\t%04x: %08x ", n, *(uint32_t *)instr); +} + +static void +field_cb(void *data, const char *field_name, struct isa_decode_value *val) +{ + struct decode_state *state = data; + + if (!strcmp(field_name, "RIMMED")) { + state->immed = val->num; + state->has_immed = true; + } + + if (!strcmp(field_name, "SHIFT")) { + state->shift = val->num; + } + + if (!strcmp(field_name, "DST")) { + if (val->num == REG_ADDR) + state->dst_is_addr = true; + } +} + +static void +post_instr_cb(void *data, unsigned n, void *instr) +{ + struct decode_state *state = data; + + if (state->has_immed) { + uint32_t immed = state->immed << state->shift; + if (state->dst_is_addr && state->shift >= 16) { + immed &= ~0x40000; /* b18 disables auto-increment of address */ + if ((immed & 0x00ffffff) == 0) { + printf("\t; "); + print_pipe_reg(immed >> 24); + } + } else { + print_gpu_reg(stdout, immed); + } + } +} + +/* Assume that instructions that don't match are raw data */ +static void +no_match(FILE *out, const BITSET_WORD *bitset, size_t size) +{ + fprintf(out, "[%08x]", bitset[0]); + print_gpu_reg(out, bitset[0]); + fprintf(out, "\n"); +} + +static void +get_decode_options(struct isa_decode_options *options) +{ + *options = (struct isa_decode_options) { + .gpu_id = gpuver, + .branch_labels = true, + .field_cb = field_cb, + .field_print_cb = field_print_cb, + .pre_instr_cb = pre_instr_cb, + .post_instr_cb = post_instr_cb, + .no_match_cb = no_match, + }; +} + +static void +disasm_instr(struct isa_decode_options *options, uint32_t *instrs, unsigned pc) +{ + isa_disasm(&instrs[pc], 4, stdout, options); +} + +static void +setup_packet_table(struct isa_decode_options *options, + uint32_t *jmptbl, uint32_t sizedwords) +{ + struct isa_entrypoint *entrypoints = malloc(sizedwords * sizeof(struct isa_entrypoint)); for (unsigned i = 0; i < sizedwords; i++) { - unsigned offset = jmptbl[i]; + entrypoints[i].offset = jmptbl[i]; unsigned n = i; // + CP_NOP; - add_jump_table_entry(n, offset); - } -} - -static void -setup_labels(uint32_t *instrs, uint32_t sizedwords) -{ - afuc_opc opc; - bool rep; - - num_label_offsets = 0; - - for (unsigned i = 0; i < sizedwords; i++) { - afuc_instr *instr = (void *)&instrs[i]; - - afuc_get_opc(instr, &opc, &rep); - - switch (opc) { - case OPC_BRNEI: - case OPC_BREQI: - case OPC_BRNEB: - case OPC_BREQB: - label_idx(i + instr->br.ioff, true); - break; - case OPC_PREEMPTLEAVE6: - if (gpuver >= 6) - label_idx(instr->call.uoff, true); - break; - case OPC_CALL: - fxn_idx(instr->call.uoff, true); - break; - case OPC_SETSECURE: - /* this implicitly jumps to pc + 3 if successful */ - label_idx(i + 3, true); - break; - default: - break; + entrypoints[i].name = afuc_pm_id_name(n); + if (!entrypoints[i].name) { + char *name; + asprintf(&name, "UNKN%d", n); + entrypoints[i].name = name; } } + + options->entrypoints = entrypoints; + options->entrypoint_count = sizedwords; } static void @@ -768,9 +232,14 @@ disasm(struct emu *emu) emu_init(emu); + struct isa_decode_options options; + struct decode_state state; + get_decode_options(&options); + options.cbdata = &state; + #ifdef BOOTSTRAP_DEBUG while (true) { - disasm_instr(emu->instrs, emu->gpr_regs.pc); + disasm_instr(&options, emu->instrs, emu->gpr_regs.pc); emu_step(emu); } #endif @@ -785,8 +254,7 @@ disasm(struct emu *emu) sizedwords = lpac_offset; } - setup_packet_table(emu->jmptbl, ARRAY_SIZE(emu->jmptbl)); - setup_labels(emu->instrs, emu->sizedwords); + setup_packet_table(&options, emu->jmptbl, ARRAY_SIZE(emu->jmptbl)); /* TODO add option to emulate LPAC SQE instead: */ if (emulator) { @@ -795,15 +263,13 @@ disasm(struct emu *emu) emu_init(emu); while (true) { - disasm_instr(emu->instrs, emu->gpr_regs.pc); + disasm_instr(&options, emu->instrs, emu->gpr_regs.pc); emu_step(emu); } } /* print instructions: */ - for (int i = 0; i < sizedwords; i++) { - disasm_instr(emu->instrs, i); - } + isa_disasm(emu->instrs, sizedwords * 4, stdout, &options); if (!lpac_offset) return; @@ -821,23 +287,20 @@ disasm(struct emu *emu) emu_init(emu); emu_run_bootstrap(emu); - setup_packet_table(emu->jmptbl, ARRAY_SIZE(emu->jmptbl)); - setup_labels(emu->instrs, emu->sizedwords); + setup_packet_table(&options, emu->jmptbl, ARRAY_SIZE(emu->jmptbl)); - /* print instructions: */ - for (int i = 0; i < emu->sizedwords; i++) { - disasm_instr(emu->instrs, i); - } + isa_disasm(emu->instrs, emu->sizedwords * 4, stdout, &options); } static void disasm_raw(uint32_t *instrs, int sizedwords) { - setup_labels(instrs, sizedwords); + struct isa_decode_options options; + struct decode_state state; + get_decode_options(&options); + options.cbdata = &state; - for (int i = 0; i < sizedwords; i++) { - disasm_instr(instrs, i); - } + isa_disasm(instrs, sizedwords * 4, stdout, &options); } static void @@ -848,18 +311,16 @@ disasm_legacy(uint32_t *buf, int sizedwords) uint32_t *jmptbl = &buf[jmptbl_start]; int i; - /* parse jumptable: */ - setup_packet_table(jmptbl, 0x80); + struct isa_decode_options options; + struct decode_state state; + get_decode_options(&options); + options.cbdata = &state; - /* do a pre-pass to find instructions that are potential branch targets, - * and add labels for them: - */ - setup_labels(instrs, jmptbl_start); + /* parse jumptable: */ + setup_packet_table(&options, jmptbl, 0x80); /* print instructions: */ - for (i = 0; i < jmptbl_start; i++) { - disasm_instr(instrs, i); - } + isa_disasm(instrs, sizedwords * 4, stdout, &options); /* print jumptable: */ if (verbose) { diff --git a/src/freedreno/afuc/emu-ui.c b/src/freedreno/afuc/emu-ui.c index f1f7aeeb8fe..240aac31685 100644 --- a/src/freedreno/afuc/emu-ui.c +++ b/src/freedreno/afuc/emu-ui.c @@ -142,6 +142,21 @@ read_one_value(const char **val) return 0; } +static void +print_dst(unsigned reg) +{ + if (reg == REG_REM) + printf("$rem"); /* remainding dwords in packet */ + else if (reg == REG_ADDR) + printf("$addr"); + else if (reg == REG_USRADDR) + printf("$usraddr"); + else if (reg == REG_DATA) + printf("$data"); + else + printf("$%02x", reg); +} + static void dump_gpr_register(struct emu *emu, unsigned n) { diff --git a/src/freedreno/afuc/emu.c b/src/freedreno/afuc/emu.c index 959761d3d78..dd40d5f9419 100644 --- a/src/freedreno/afuc/emu.c +++ b/src/freedreno/afuc/emu.c @@ -34,9 +34,13 @@ #include "freedreno_pm4.h" +#include "isaspec.h" + #include "emu.h" #include "util.h" +extern int gpuver; + #define rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) #define rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) @@ -120,37 +124,23 @@ load_store_addr(struct emu *emu, unsigned gpr) } static void -emu_instr(struct emu *emu, afuc_instr *instr) +emu_instr(struct emu *emu, struct afuc_instr *instr) { uint32_t rem = emu_get_gpr_reg(emu, REG_REM); - afuc_opc opc; - bool rep; - afuc_get_opc(instr, &opc, &rep); - - switch (opc) { + switch (instr->opc) { case OPC_NOP: break; + case OPC_MSB: case OPC_ADD ... OPC_CMP: { - uint32_t val = emu_alu(emu, opc, - emu_get_gpr_reg(emu, instr->alui.src), - instr->alui.uimm); - emu_set_gpr_reg(emu, instr->alui.dst, val); - break; - } - case OPC_MOVI: { - uint32_t val = instr->movi.uimm << instr->movi.shift; - emu_set_gpr_reg(emu, instr->movi.dst, val); - break; - } - case OPC_ALU: { - uint32_t val = emu_alu(emu, instr->alu.alu, - emu_get_gpr_reg(emu, instr->alu.src1), - emu_get_gpr_reg(emu, instr->alu.src2)); - emu_set_gpr_reg(emu, instr->alu.dst, val); + uint32_t val = emu_alu(emu, instr->opc, + emu_get_gpr_reg(emu, instr->src1), + instr->has_immed ? instr->immed : + emu_get_gpr_reg(emu, instr->src2)); + emu_set_gpr_reg(emu, instr->dst, val); - if (instr->alu.xmov) { - unsigned m = MIN2(instr->alu.xmov, rem); + if (instr->xmov) { + unsigned m = MIN2(instr->xmov, rem); assert(m <= 3); @@ -158,108 +148,113 @@ emu_instr(struct emu *emu, afuc_instr *instr) emu_set_gpr_reg(emu, REG_REM, --rem); emu_dump_state_change(emu); emu_set_gpr_reg(emu, REG_DATA, - emu_get_gpr_reg(emu, instr->alu.src2)); + emu_get_gpr_reg(emu, instr->src2)); } else if (m == 2) { emu_set_gpr_reg(emu, REG_REM, --rem); emu_dump_state_change(emu); emu_set_gpr_reg(emu, REG_DATA, - emu_get_gpr_reg(emu, instr->alu.src2)); + emu_get_gpr_reg(emu, instr->src2)); emu_set_gpr_reg(emu, REG_REM, --rem); emu_dump_state_change(emu); emu_set_gpr_reg(emu, REG_DATA, - emu_get_gpr_reg(emu, instr->alu.src2)); + emu_get_gpr_reg(emu, instr->src2)); } else if (m == 3) { emu_set_gpr_reg(emu, REG_REM, --rem); emu_dump_state_change(emu); emu_set_gpr_reg(emu, REG_DATA, - emu_get_gpr_reg(emu, instr->alu.src2)); + emu_get_gpr_reg(emu, instr->src2)); emu_set_gpr_reg(emu, REG_REM, --rem); emu_dump_state_change(emu); - emu_set_gpr_reg(emu, instr->alu.dst, - emu_get_gpr_reg(emu, instr->alu.src2)); + emu_set_gpr_reg(emu, instr->dst, + emu_get_gpr_reg(emu, instr->src2)); emu_set_gpr_reg(emu, REG_REM, --rem); emu_dump_state_change(emu); emu_set_gpr_reg(emu, REG_DATA, - emu_get_gpr_reg(emu, instr->alu.src2)); + emu_get_gpr_reg(emu, instr->src2)); } } break; } - case OPC_CWRITE6: { - uint32_t src1 = emu_get_gpr_reg(emu, instr->control.src1); - uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2); - - if (instr->control.flags == 0x4) { - emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm); - } else if (instr->control.flags && !emu->quiet) { - printf("unhandled flags: %x\n", instr->control.flags); - } - - emu_set_control_reg(emu, src2 + instr->control.uimm, src1); + case OPC_MOVI: { + uint32_t val = instr->immed << instr->shift; + emu_set_gpr_reg(emu, instr->dst, val); break; } - case OPC_CREAD6: { - uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2); + case OPC_CWRITE: { + uint32_t src1 = emu_get_gpr_reg(emu, instr->src1); + uint32_t src2 = emu_get_gpr_reg(emu, instr->src2); - if (instr->control.flags == 0x4) { - emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm); - } else if (instr->control.flags && !emu->quiet) { - printf("unhandled flags: %x\n", instr->control.flags); + if (instr->bit == 0x4) { + emu_set_gpr_reg(emu, instr->src2, src2 + instr->immed); + } else if (instr->bit && !emu->quiet) { + printf("unhandled flags: %x\n", instr->bit); } - emu_set_gpr_reg(emu, instr->control.src1, - emu_get_control_reg(emu, src2 + instr->control.uimm)); + emu_set_control_reg(emu, src2 + instr->immed, src1); break; } - case OPC_LOAD6: { - uintptr_t addr = load_store_addr(emu, instr->control.src2) + - instr->control.uimm; + case OPC_CREAD: { + uint32_t src1 = emu_get_gpr_reg(emu, instr->src1); - if (instr->control.flags == 0x4) { - uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2); - emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm); - } else if (instr->control.flags && !emu->quiet) { - printf("unhandled flags: %x\n", instr->control.flags); + if (instr->bit == 0x4) { + emu_set_gpr_reg(emu, instr->src1, src1 + instr->immed); + } else if (instr->bit && !emu->quiet) { + printf("unhandled flags: %x\n", instr->bit); + } + + emu_set_gpr_reg(emu, instr->dst, + emu_get_control_reg(emu, src1 + instr->immed)); + break; + } + case OPC_LOAD: { + uintptr_t addr = load_store_addr(emu, instr->src1) + + instr->immed; + + if (instr->bit == 0x4) { + uint32_t src1 = emu_get_gpr_reg(emu, instr->src1); + emu_set_gpr_reg(emu, instr->src1, src1 + instr->immed); + } else if (instr->bit && !emu->quiet) { + printf("unhandled flags: %x\n", instr->bit); } uint32_t val = emu_mem_read_dword(emu, addr); - emu_set_gpr_reg(emu, instr->control.src1, val); + emu_set_gpr_reg(emu, instr->dst, val); break; } - case OPC_STORE6: { - uintptr_t addr = load_store_addr(emu, instr->control.src2) + - instr->control.uimm; + case OPC_STORE: { + uintptr_t addr = load_store_addr(emu, instr->src2) + + instr->immed; - if (instr->control.flags == 0x4) { - uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2); - emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm); - } else if (instr->control.flags && !emu->quiet) { - printf("unhandled flags: %x\n", instr->control.flags); + if (instr->bit == 0x4) { + uint32_t src2 = emu_get_gpr_reg(emu, instr->src2); + emu_set_gpr_reg(emu, instr->src2, src2 + instr->immed); + } else if (instr->bit && !emu->quiet) { + printf("unhandled flags: %x\n", instr->bit); } - uint32_t val = emu_get_gpr_reg(emu, instr->control.src1); + uint32_t val = emu_get_gpr_reg(emu, instr->src1); emu_mem_write_dword(emu, addr, val); break; } case OPC_BRNEI ... OPC_BREQB: { - uint32_t off = emu->gpr_regs.pc + instr->br.ioff; - uint32_t src = emu_get_gpr_reg(emu, instr->br.src); + uint32_t off = emu->gpr_regs.pc + instr->offset; + uint32_t src = emu_get_gpr_reg(emu, instr->src1); - if (opc == OPC_BRNEI) { - if (src != instr->br.bit_or_imm) + if (instr->opc == OPC_BRNEI) { + if (src != instr->immed) emu->branch_target = off; - } else if (opc == OPC_BREQI) { - if (src == instr->br.bit_or_imm) + } else if (instr->opc == OPC_BREQI) { + if (src == instr->immed) emu->branch_target = off; - } else if (opc == OPC_BRNEB) { - if (!(src & (1 << instr->br.bit_or_imm))) + } else if (instr->opc == OPC_BRNEB) { + if (!(src & (1 << instr->bit))) emu->branch_target = off; - } else if (opc == OPC_BREQB) { - if (src & (1 << instr->br.bit_or_imm)) + } else if (instr->opc == OPC_BREQB) { + if (src & (1 << instr->bit)) emu->branch_target = off; } else { assert(0); @@ -281,11 +276,11 @@ emu_instr(struct emu *emu, afuc_instr *instr) * presumably the return PC is two instructions later: */ emu->call_stack[emu->call_stack_idx++] = emu->gpr_regs.pc + 2; - emu->branch_target = instr->call.uoff; + emu->branch_target = instr->literal; break; } - case OPC_WIN: { + case OPC_WAITIN: { assert(!emu->branch_target); emu->run_mode = false; emu->waitin = true; @@ -298,11 +293,11 @@ emu_instr(struct emu *emu, afuc_instr *instr) break; } default: - printf("unhandled opc: 0x%02x\n", opc); + printf("unhandled opc: 0x%02x\n", instr->opc); exit(1); } - if (rep) { + if (instr->rep) { assert(rem > 0); emu_set_gpr_reg(emu, REG_REM, --rem); } @@ -311,9 +306,26 @@ emu_instr(struct emu *emu, afuc_instr *instr) void emu_step(struct emu *emu) { - afuc_instr *instr = (void *)&emu->instrs[emu->gpr_regs.pc]; - afuc_opc opc; - bool rep; + struct afuc_instr *instr; + bool decoded = isa_decode((void *)&instr, + (void *)&emu->instrs[emu->gpr_regs.pc], + &(struct isa_decode_options) { + .gpu_id = gpuver, + }); + + if (!decoded) { + uint32_t instr_val = emu->instrs[emu->gpr_regs.pc]; + if ((instr_val >> 27) == 0) { + /* This is printed as an undecoded literal to show the immediate + * payload, but when executing it's just a NOP. + */ + instr = calloc(1, sizeof(struct afuc_instr)); + instr->opc = OPC_NOP; + } else { + printf("unmatched instruction: 0x%08x\n", instr_val); + exit(1); + } + } emu_main_prompt(emu); @@ -323,9 +335,7 @@ emu_step(struct emu *emu) bool waitin = emu->waitin; emu->waitin = false; - afuc_get_opc(instr, &opc, &rep); - - if (rep) { + if (instr->rep) { do { if (!emu_get_gpr_reg(emu, REG_REM)) break; @@ -380,6 +390,8 @@ emu_step(struct emu *emu) } emu_dump_state_change(emu); + + free(instr); } void diff --git a/src/freedreno/afuc/isa.h b/src/freedreno/afuc/isa.h new file mode 100644 index 00000000000..48e801b7515 --- /dev/null +++ b/src/freedreno/afuc/isa.h @@ -0,0 +1,68 @@ +/* + * Copyright © 2020 Google, Inc. + * Copyright © 2023 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _ISA_H_ +#define _ISA_H_ + +#include + +#include "compiler/isaspec/isaspec.h" +#include "afuc.h" + +static inline struct afuc_instr *__instruction_create(afuc_opc opc) +{ + struct afuc_instr *instr = calloc(1, sizeof(struct afuc_instr)); + + switch (opc) { +#define ALU(name) \ + case OPC_##name##I: \ + instr->opc = OPC_##name; \ + instr->has_immed = true; \ + break; + ALU(ADD) + ALU(ADDHI) + ALU(SUB) + ALU(SUBHI) + ALU(AND) + ALU(OR) + ALU(XOR) + ALU(NOT) + ALU(SHL) + ALU(USHR) + ALU(ISHR) + ALU(ROT) + ALU(MUL8) + ALU(MIN) + ALU(MAX) + ALU(CMP) +#undef ALU + + default: + instr->opc = opc; + } + + return instr; +} + +#endif /* _ISA_H_ */ diff --git a/src/freedreno/afuc/meson.build b/src/freedreno/afuc/meson.build index 6d209b587c8..a226d4240ad 100644 --- a/src/freedreno/afuc/meson.build +++ b/src/freedreno/afuc/meson.build @@ -40,6 +40,15 @@ afuc_lexer = custom_target( ] ) +encode_h = custom_target( + 'encode.h', + input: ['afuc.xml'], + output: 'encode.h', + command: [ + prog_isaspec_encode, '--xml', '@INPUT@', '--out-h', '@OUTPUT@' + ], +) + asm = executable( 'afuc-asm', [ @@ -48,6 +57,7 @@ asm = executable( 'util.h', afuc_lexer, afuc_parser, + encode_h, ], include_directories: [ inc_freedreno_rnn, inc_include, inc_src, inc_util, @@ -72,6 +82,16 @@ if with_tests ) endif +afuc_isa = custom_target( + 'afuc-isa', + input: ['afuc.xml'], + output: ['afuc-isa.c', 'afuc-isa.h'], + command: [ + prog_isaspec_decode, '--xml', '@INPUT@', + '--out-c', '@OUTPUT0@', '--out-h', '@OUTPUT1@', + ], +) + # Disasm requires mmaping >4GB if cc.sizeof('size_t') > 4 disasm = executable( @@ -85,6 +105,7 @@ if cc.sizeof('size_t') > 4 'emu-ui.c', 'util.c', 'util.h', + afuc_isa, ], include_directories: [ inc_freedreno, @@ -96,8 +117,7 @@ if cc.sizeof('size_t') > 4 link_with: [ libfreedreno_rnn, ], - dependencies: [ - ], + dependencies: [idep_mesautil, idep_isaspec_decode], build_by_default : with_tools.contains('freedreno'), install: install_fd_decode_tools, ) diff --git a/src/freedreno/afuc/parser.y b/src/freedreno/afuc/parser.y index 5a8164e59fe..1e8676f5041 100644 --- a/src/freedreno/afuc/parser.y +++ b/src/freedreno/afuc/parser.y @@ -50,12 +50,12 @@ void yyerror(const char *error) fprintf(stderr, "error at line %d: %s\n", yyget_lineno(), error); } -static struct asm_instruction *instr; /* current instruction */ +static struct afuc_instr *instr; /* current instruction */ static void -new_instr(int tok) +new_instr(afuc_opc opc) { - instr = next_instr(tok); + instr = next_instr(opc); } static void @@ -182,9 +182,10 @@ instr_or_label: instr_r | T_LABEL_DECL { decl_label($1); } /* instructions that can optionally have (rep) flag: */ -instr_r: alu_instr +instr_r: alu_instr { instr->xmov = 0; } | T_XMOV alu_instr { instr->xmov = $1; } -| config_instr +| load_instr +| store_instr /* need to special case: * - not (single src, possibly an immediate) @@ -193,36 +194,36 @@ instr_r: alu_instr * from the other ALU instructions: */ -alu_msb_instr: T_OP_MSB reg ',' reg { new_instr($1); dst($2); src2($4); } +alu_msb_instr: T_OP_MSB reg ',' reg { new_instr(OPC_MSB); dst($2); src1($4); } -alu_not_instr: T_OP_NOT reg ',' reg { new_instr($1); dst($2); src2($4); } -| T_OP_NOT reg ',' immediate { new_instr($1); dst($2); immed($4); } +alu_not_instr: T_OP_NOT reg ',' reg { new_instr(OPC_NOT); dst($2); src1($4); } +| T_OP_NOT reg ',' immediate { new_instr(OPC_NOT); dst($2); immed($4); } -alu_mov_instr: T_OP_MOV reg ',' reg { new_instr($1); dst($2); src1($4); } +alu_mov_instr: T_OP_MOV reg ',' reg { new_instr(OPC_OR); dst($2); src1(0); src2($4); } | T_OP_MOV reg ',' immediate T_LSHIFT immediate { - new_instr($1); dst($2); immed($4); shift($6); + new_instr(OPC_MOVI); dst($2); immed($4); shift($6); } -| T_OP_MOV reg ',' immediate { new_instr($1); dst($2); immed($4); } +| T_OP_MOV reg ',' immediate { new_instr(OPC_MOVI); dst($2); immed($4); shift(0); } | T_OP_MOV reg ',' T_LABEL_REF T_LSHIFT immediate { - new_instr($1); dst($2); label($4); shift($6); + new_instr(OPC_MOVI); dst($2); label($4); shift($6); } -| T_OP_MOV reg ',' T_LABEL_REF { new_instr($1); dst($2); label($4); } +| T_OP_MOV reg ',' T_LABEL_REF { new_instr(OPC_MOVI); dst($2); label($4); shift(0); } -alu_2src_op: T_OP_ADD { new_instr($1); } -| T_OP_ADDHI { new_instr($1); } -| T_OP_SUB { new_instr($1); } -| T_OP_SUBHI { new_instr($1); } -| T_OP_AND { new_instr($1); } -| T_OP_OR { new_instr($1); } -| T_OP_XOR { new_instr($1); } -| T_OP_SHL { new_instr($1); } -| T_OP_USHR { new_instr($1); } -| T_OP_ISHR { new_instr($1); } -| T_OP_ROT { new_instr($1); } -| T_OP_MUL8 { new_instr($1); } -| T_OP_MIN { new_instr($1); } -| T_OP_MAX { new_instr($1); } -| T_OP_CMP { new_instr($1); } +alu_2src_op: T_OP_ADD { new_instr(OPC_ADD); } +| T_OP_ADDHI { new_instr(OPC_ADDHI); } +| T_OP_SUB { new_instr(OPC_SUB); } +| T_OP_SUBHI { new_instr(OPC_SUBHI); } +| T_OP_AND { new_instr(OPC_AND); } +| T_OP_OR { new_instr(OPC_OR); } +| T_OP_XOR { new_instr(OPC_XOR); } +| T_OP_SHL { new_instr(OPC_SHL); } +| T_OP_USHR { new_instr(OPC_USHR); } +| T_OP_ISHR { new_instr(OPC_ISHR); } +| T_OP_ROT { new_instr(OPC_ROT); } +| T_OP_MUL8 { new_instr(OPC_MUL8); } +| T_OP_MIN { new_instr(OPC_MIN); } +| T_OP_MAX { new_instr(OPC_MAX); } +| T_OP_CMP { new_instr(OPC_CMP); } alu_2src_instr: alu_2src_op reg ',' reg ',' reg { dst($2); src1($4); src2($6); } | alu_2src_op reg ',' reg ',' immediate { dst($2); src1($4); immed($6); } @@ -232,30 +233,33 @@ alu_instr: alu_2src_instr | alu_not_instr | alu_mov_instr -config_op: T_OP_CWRITE { new_instr($1); } -| T_OP_CREAD { new_instr($1); } -| T_OP_LOAD { new_instr($1); } -| T_OP_STORE { new_instr($1); } +load_op: T_OP_LOAD { new_instr(OPC_LOAD); } +| T_OP_CREAD { new_instr(OPC_CREAD); } +store_op: T_OP_STORE { new_instr(OPC_STORE); } +| T_OP_CWRITE { new_instr(OPC_CWRITE); } -config_instr: config_op reg ',' '[' reg '+' immediate ']' ',' immediate { +load_instr: load_op reg ',' '[' reg '+' immediate ']' ',' immediate { + dst($2); src1($5); immed($7); bit($10); +} +store_instr: store_op reg ',' '[' reg '+' immediate ']' ',' immediate { src1($2); src2($5); immed($7); bit($10); } -branch_op: T_OP_BRNE { new_instr($1); } -| T_OP_BREQ { new_instr($1); } +branch_op: T_OP_BRNE { new_instr(OPC_BRNE); } +| T_OP_BREQ { new_instr(OPC_BREQ); } branch_instr: branch_op reg ',' T_BIT ',' T_LABEL_REF { src1($2); bit($4); label($6); } | branch_op reg ',' immediate ',' T_LABEL_REF { src1($2); immed($4); label($6); } -other_instr: T_OP_CALL T_LABEL_REF { new_instr($1); label($2); } -| T_OP_PREEMPTLEAVE T_LABEL_REF { new_instr($1); label($2); } -| T_OP_SETSECURE reg ',' T_LABEL_REF { new_instr($1); src1($2); label($4); } -| T_OP_RET { new_instr($1); } -| T_OP_IRET { new_instr($1); } -| T_OP_JUMP T_LABEL_REF { new_instr($1); label($2); } -| T_OP_WAITIN { new_instr($1); } -| T_OP_NOP { new_instr($1); } -| T_LITERAL { new_instr($1); literal($1); } +other_instr: T_OP_CALL T_LABEL_REF { new_instr(OPC_CALL); label($2); } +| T_OP_PREEMPTLEAVE T_LABEL_REF { new_instr(OPC_PREEMPTLEAVE); label($2); } +| T_OP_SETSECURE reg ',' T_LABEL_REF { new_instr(OPC_SETSECURE); src1($2); label($4); } +| T_OP_RET { new_instr(OPC_RET); } +| T_OP_IRET { new_instr(OPC_IRET); } +| T_OP_JUMP T_LABEL_REF { new_instr(OPC_JUMP); label($2); } +| T_OP_WAITIN { new_instr(OPC_WAITIN); } +| T_OP_NOP { new_instr(OPC_NOP); } +| T_LITERAL { new_instr(OPC_RAW_LITERAL); literal($1); } reg: T_REGISTER