From 6d94f575d2137f6f31353df6b0d6279e9d206ea8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 13 Dec 2020 16:20:40 -0800 Subject: [PATCH] freedreno/hw/isa: Add description of ir3 ISA Signed-off-by: Rob Clark Part-of: --- src/freedreno/isa/ir3-cat0.xml | 267 ++++++++++ src/freedreno/isa/ir3-cat1.xml | 329 ++++++++++++ src/freedreno/isa/ir3-cat2.xml | 344 ++++++++++++ src/freedreno/isa/ir3-cat3.xml | 229 ++++++++ src/freedreno/isa/ir3-cat4.xml | 120 +++++ src/freedreno/isa/ir3-cat5.xml | 643 +++++++++++++++++++++++ src/freedreno/isa/ir3-cat6.xml | 872 +++++++++++++++++++++++++++++++ src/freedreno/isa/ir3-cat7.xml | 63 +++ src/freedreno/isa/ir3-common.xml | 353 +++++++++++++ src/freedreno/isa/ir3.xml | 90 ++++ src/freedreno/isa/meson.build | 103 ++++ src/freedreno/meson.build | 1 + 12 files changed, 3414 insertions(+) create mode 100644 src/freedreno/isa/ir3-cat0.xml create mode 100644 src/freedreno/isa/ir3-cat1.xml create mode 100644 src/freedreno/isa/ir3-cat2.xml create mode 100644 src/freedreno/isa/ir3-cat3.xml create mode 100644 src/freedreno/isa/ir3-cat4.xml create mode 100644 src/freedreno/isa/ir3-cat5.xml create mode 100644 src/freedreno/isa/ir3-cat6.xml create mode 100644 src/freedreno/isa/ir3-cat7.xml create mode 100644 src/freedreno/isa/ir3-common.xml create mode 100644 src/freedreno/isa/ir3.xml create mode 100644 src/freedreno/isa/meson.build diff --git a/src/freedreno/isa/ir3-cat0.xml b/src/freedreno/isa/ir3-cat0.xml new file mode 100644 index 00000000000..bb6074011f4 --- /dev/null +++ b/src/freedreno/isa/ir3-cat0.xml @@ -0,0 +1,267 @@ + + + + + + + + + + + + + x + + + + + 000 + + src->cat0.immed + src->cat0.comp1 + src->cat0.comp2 + src->cat0.inv1 + src->cat0.inv2 + + + + + + + {SY}{SS}{EQ}{JP}{REPEAT}{NAME} + + 00000 + 000 + 000 + 000 + + + + xx0 + 0000 + + + + xx0 + 0110 + + + + xx0 + 0100 + + + + xx0 + 0111 + + + + xx0 + 1000 + + + + xx0 + 1001 + + + + xx0 + 1010 + + + + xx0 + 1011 + + + + SHader Prologue End + xx1 + 1000 + + + + xx1 + 1111 + + + + + + {SY}{SS}{EQ}{JP}{NAME} {INV1}p0.{COMP1} + + 00000 + 000 + 000 + + Invert source condition + + + Predicate register (p0.c) component for source + + + + + xx0 + 0101 + + + + xx1 + 1101 + + + + xx1 + 1110 + + + + + + {SY}{SS}{JP}{NAME} #{IMMED} + + xxxxx + xxx + xxx + xxx + + + + xx0 + 0010 + + + + xx0 + 0011 + + + + xx1 + 0000 + + + + xx1 + 0101 + + + + SHader Prologue Start + xx1 + 0111 + + + + xx0 + 0001 + + + + + {SY}{SS}{EQ}{JP}{NAME}.{INDEX} #{IMMED} + + + 011 + xxx + xxx + + src->cat0.idx + + + + + xxxxx + 110 + xxx + xxx + + + + + {SY}{SS}{EQ}{JP}{NAME} {INV1}p0.{COMP1}, #{IMMED} + + xxxxx + xxx + + Invert source condition + + + Predicate register (p0.c) component for source + + + + + 000 + + + + 100 + + + + 101 + + + + + {SY}{SS}{EQ}{JP}{NAME} {INV1}p0.{COMP1}, {INV2}p0.{COMP2}, #{IMMED} + + xxxxx + + + Invert source 2 condition + + + Predicate register (p0.c) component for source 2 + + + + Invert source 1 condition + + + Predicate register (p0.c) component for source 1 + + + + + 001 + + + + 010 + + + + + \ No newline at end of file diff --git a/src/freedreno/isa/ir3-cat1.xml b/src/freedreno/isa/ir3-cat1.xml new file mode 100644 index 00000000000..e55c7357861 --- /dev/null +++ b/src/freedreno/isa/ir3-cat1.xml @@ -0,0 +1,329 @@ + + + + + + + + + + Unlike other instruction categories, cat1 can have relative dest + + + + ({OFFSET} == 0) && {DST_REL} + + + r<a0.x> + + + + + + {DST_REL} + + + r<a0.x + {OFFSET}> + + + + + {DST} + + + + src + src->array.offset + + + + + + + + + + + + + + + + 001 + + src->regs[1] + !!(src->regs[1]->flags & IR3_REG_R) + !!(src->flags & IR3_INSTR_UL) + src->cat1.dst_type + !!(src->regs[0]->flags & IR3_REG_RELATIV) + src->cat1.src_type + !!(src->regs[0]->flags & IR3_REG_EVEN) + !!(src->regs[0]->flags & IR3_REG_POS_INF) + + + + + + + ({DST} == 0xf4 /* a0.x */) && ({SRC_TYPE} == 4 /* s16 */) && ({DST_TYPE} == 4) + + + {SY}{SS}{JP}{REPEAT}{UL}mova {EVEN}{POS_INF}a0.x, {SRC} + + 11110100 + 100 + 100 + + + + ({DST} == 0xf5 /* a0.y */) && ({SRC_TYPE} == 2 /* u16 */) && ({DST_TYPE} == 2) + + + {SY}{SS}{JP}{REPEAT}{UL}mova1 {EVEN}{POS_INF}a1.x, {SRC} + + 11110101 + 010 + 010 + + + + {SRC_TYPE} != {DST_TYPE} + + + {SY}{SS}{JP}{REPEAT}{UL}cov.{SRC_TYPE}{DST_TYPE} {EVEN}{POS_INF}{DST_HALF}{DST}, {SRC} + + + + {SY}{SS}{JP}{REPEAT}{UL}mov.{SRC_TYPE}{DST_TYPE} {EVEN}{POS_INF}{DST_HALF}{DST}, {SRC} + + 00 + + + ({SRC_TYPE} == 0) /* f16 */ || + ({SRC_TYPE} == 2) /* u16 */ || + ({SRC_TYPE} == 4) /* s16 */ || + ({SRC_TYPE} == 6) /* u8 */ || + ({SRC_TYPE} == 7) /* s8 */ + + + + + ({DST_TYPE} == 0) /* f16 */ || + ({DST_TYPE} == 2) /* u16 */ || + ({DST_TYPE} == 4) /* s16 */ || + ({DST_TYPE} == 6) /* u8 */ || + ({DST_TYPE} == 7) /* s8 */ + + + + + + + + + + + + {SRC_TYPE} == 0 /* f16 */ + + + h({IMMED}) + + + + + + {SRC_TYPE} == 1 /* f32 */ + + + ({IMMED}) + + + + + + ({SRC_TYPE} == 3 /* u32 */) && ({IMMED} > 0x1000) + + + 0x{IMMED} + + + + + + {SRC_TYPE} == 4 /* s16 */ + + + + + + {SRC_TYPE} == 5 /* s32 */ + + + + + + {IMMED} + + + + + src->uim_val + + + + + + {SRC_R}{HALF}{CONST} + + + + src + + + + + + {SRC_R}{HALF}{SRC} + + + + src + + + + + + {SRC_R}{HALF}{SRC} + + + + src + + + + + + {SRC_R}{HALF}{SRC} + + + + src + + + + + + + + + + 0 + 10 + + + + + + + + 000000000000000000000 + + 01 + + + + + + + + 000000000000000000000000 + + 00 + + + + 1 + 00000000000000000000 + + 00 + + + + + + + + 0 + + + + + + + + 1 + + + + + + + {SY}{SS}{JP}{UL}movmsk.w{W} {DST} + + + + ({REPEAT} + 1) * 32 + + + 00000000000000000000000000000000 + 0 + 011 + 011 + 00 + 11 + + + + util_last_bit(src->regs[0]->wrmask) - 1 + + + + + diff --git a/src/freedreno/isa/ir3-cat2.xml b/src/freedreno/isa/ir3-cat2.xml new file mode 100644 index 00000000000..d62c933d3aa --- /dev/null +++ b/src/freedreno/isa/ir3-cat2.xml @@ -0,0 +1,344 @@ + + + + + + + + + + + + + + + + Destination register is opposite precision as source, ie. + if {FULL} is true then destination is half precision, and + visa versa. + + + + + + Full precision source registers + + + + 010 + + + + + + !!(src->flags & IR3_INSTR_SAT) + + ((src->regs[0]->num >> 2) == 62) ? 0 : + !!((src->regs[1]->flags ^ src->regs[0]->flags) & IR3_REG_HALF) + + !!(src->regs[0]->flags & IR3_REG_EI) + !(src->regs[1]->flags & IR3_REG_HALF) + extract_SRC1_R(src) + extract_SRC2_R(src) + + + + + + + {SY}{SS}{JP}{SAT}(nop{NOP}) {UL}{NAME} {EI}{DST_HALF}{DST}, {SRC1} + + + + + + + + + {SY}{SS}{JP}{SAT}{REPEAT}{UL}{NAME} {EI}{DST_HALF}{DST}, {SRC1} + + xxxxxxxxxxxxxxxx + xxx + + + + + + + + + + + + {SY}{SS}{JP}{SAT}(nop{NOP}) {UL}{NAME} {EI}{DST_HALF}{DST}, {SRC1}, {SRC2} + + + + + + + + + + + + + {SY}{SS}{JP}{SAT}{REPEAT}{UL}{NAME} {EI}{DST_HALF}{DST}, {SRC1}, {SRC2} + + + + + + + + + + xxx + + + + + + + {SY}{SS}{JP}{SAT}(nop{NOP}) {UL}{NAME}.{COND} {EI}{DST_HALF}{DST}, {SRC1}, {SRC2} + + + + + + + + + + + + + {SY}{SS}{JP}{SAT}{REPEAT}{UL}{NAME}.{COND} {EI}{DST_HALF}{DST}, {SRC1}, {SRC2} + + + + + + + + + + + + src->cat2.condition + + + + + + 000000 + + + + 000001 + + + + 000010 + + + + 000011 + + + + 000100 + + + + 000101 + + + + 000110 + + + + 000111 + + + + 001001 + + + + 001010 + + + + 001011 + + + + 001100 + + + + 001101 + + + + 010000 + + + + 010001 + + + + 010010 + + + + 010011 + + + + 010100 + + + + 010101 + + + + 010110 + + + + 010111 + + + + 011000 + + + + 011001 + + + + 011010 + + + + 011100 + + + + 011101 + + + + 011110 + + + + 011111 + + + + 100001 + + + + 100010 + + + + 110000 + + + + 110001 + + + + 110010 + + + + 110011 + + + + 110100 + + + + 110101 + + + + 110110 + + + + 110111 + + + + 111000 + + + + 111001 + + + + 111010 + + + + 111011 + + + + 111100 + + + + 111101 + + + + 111110 + + + + 111111 + + + diff --git a/src/freedreno/isa/ir3-cat3.xml b/src/freedreno/isa/ir3-cat3.xml new file mode 100644 index 00000000000..8ae7d1e31d1 --- /dev/null +++ b/src/freedreno/isa/ir3-cat3.xml @@ -0,0 +1,229 @@ + + + + + + + + + + cat3 src1 and src2, some parts are similar to cat2/cat4 src + encoding, but a few extra bits trimmed out to squeeze in the + 3rd src register (dropping (abs), immed encoding, and moving + a few other bits elsewhere) + + + + + + + {HALF}{SRC} + + + 00000 + + src + + + + + + {HALF}c{CONST}.{SWIZ} + + + + 10 + + src->num >> 2 + src->num & 0x3 + + + + + 01 + + src->array.offset + + + + + + {HALF}r<a0.x + {OFFSET}> + + + 0 + + + + + {HALF}c<a0.x + {OFFSET}> + + + 1 + + + + + + {SY}{SS}{JP}{SAT}(nop{NOP}) {UL}{NAME} {DST_HALF}{DST}, {SRC1_NEG}{SRC1}, {SRC2_NEG}{HALF}{SRC2}, {SRC3_NEG}{SRC3} + + + + + {SY}{SS}{JP}{SAT}{REPEAT}{UL}{NAME} {DST_HALF}{DST}, {SRC1_NEG}{SRC1_R}{SRC1}, {SRC2_NEG}{SRC2_R}{HALF}{SRC2}, {SRC3_NEG}{SRC3_R}{SRC3} + + + + + + x + + + + + + + + + + + + + + + + + The source precision is determined by the instruction + opcode. If {DST_CONV} the result is widened/narrowed + to the opposite precision. + + + + + + + 011 + + + + !!(src->regs[1]->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)) + extract_SRC1_R(src) + extract_SRC2_R(src) + !!(src->regs[3]->flags & IR3_REG_R) + !!(src->regs[2]->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)) + !!(src->regs[3]->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)) + src->regs[1] + + ((src->regs[0]->num >> 2) == 62) ? 0 : + !!((src->regs[1]->flags ^ src->regs[0]->flags) & IR3_REG_HALF) + + + + + + 0000 + + + + + 0001 + + + + + 0010 + + + + + 0011 + + + + + 0100 + + + + + 0101 + + + + + 0110 + + + + + 0111 + + + + + 1000 + + + + + 1001 + + + + + 1010 + + + + + 1011 + + + + + 1100 + + + + + 1101 + + + + + 1110 + + + + + 1111 + + + + diff --git a/src/freedreno/isa/ir3-cat4.xml b/src/freedreno/isa/ir3-cat4.xml new file mode 100644 index 00000000000..9a1ea49b117 --- /dev/null +++ b/src/freedreno/isa/ir3-cat4.xml @@ -0,0 +1,120 @@ + + + + + + + + + + {SY}{SS}{JP}{SAT}{REPEAT}{UL}{NAME} {DST_HALF}{DST}, {SRC} + + + + + + xxxxxxxxxxxxxxxx + + + + + + + + + Destination register is opposite precision as source, ie. + if {FULL} is true then destination is half precision, and + visa versa. + + + + xxxxx + + Full precision source registers + + + + + 100 + + src->regs[1] + + ((src->regs[0]->num >> 2) == 62) ? 0 : + !!((src->regs[1]->flags ^ src->regs[0]->flags) & IR3_REG_HALF) + + !(src->regs[1]->flags & IR3_REG_HALF) + !!(src->regs[1]->flags & IR3_REG_R) + + + + + 000000 + + + + 000001 + + + + 000010 + + + + 000011 + + + + 000100 + + + + 000101 + + + + 000110 + + + + + + 001001 + + + + 001010 + + + + 001011 + + + diff --git a/src/freedreno/isa/ir3-cat5.xml b/src/freedreno/isa/ir3-cat5.xml new file mode 100644 index 00000000000..2568f40c253 --- /dev/null +++ b/src/freedreno/isa/ir3-cat5.xml @@ -0,0 +1,643 @@ + + + + + + + + + + The BASE field is actually split across BASE_LO and BASE_HI, + but '.baseN' should only appear in the bindless case.. the + easiest way to accomplish that is by splitting it out into a + bitset. We just arbitrarily map this to BASE_LO + + + {BINDLESS} + + .base{BASE} + + + + + + ({BASE_HI} * 2) | {BASE_LO} + + + src->cat5.tex_base & 0x1 + + + + + + {S2EN_BINDLESS} + + The s2en (indirect) or bindless case + + + {SY}{JP}{NAME}{3D}{A}{O}{P}{S}{S2EN}{UNIFORM}{BASE} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}{SRC3}{A1} + + + + + + + + + + + + + + + + + + + + + The "normal" case, ie. not s2en (indirect) and/or bindless + + + {SY}{JP}{NAME}{3D}{A}{O}{P}{S} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}{SAMP}{TEX} + + + + ({TYPE} == 0) /* f16 */ || + ({TYPE} == 2) /* u16 */ || + ({TYPE} == 4) /* s16 */ || + ({TYPE} == 6) /* u8 */ || + ({TYPE} == 7) /* s8 */ + + + + + + + + + + + + + + + 0x + 00 + + + + + + + + + + + + + 0 + + + + + + + + + + 101 + + extract_cat5_FULL(src) + src + src + src->regs[0]->wrmask + src + src + src->cat5.tex_base >> 1 + !!(src->flags & IR3_INSTR_3D) + !!(src->flags & IR3_INSTR_A) + !!(src->flags & IR3_INSTR_S) + !!(src->flags & (IR3_INSTR_S2EN | IR3_INSTR_B)) + !!(src->flags & IR3_INSTR_O) + !!(src->flags & IR3_INSTR_P) + extract_cat5_DESC_MODE(src) + + extract_cat5_SRC(src, 1) + extract_cat5_SRC(src, 2) + src->regs[1] + + + + + 00000 + + + + + + + + 00001 + + + + + + + + 00010 + + + + + + + + 00011 + + + + + + + + 00100 + + + + + + + + 00101 + + + + + + + + 00110 + + + + + + + + 00111 + + + + + + + + 01000 + + + + + + + + 01001 + + + + + + + + 01010 + + + + + + + + 01011 + + + + + + + + 01100 + + + + + + + + 01101 + + + + + + + + 01110 + + + + + + + + 01111 + + + + + + + + 10000 + + + + + + + + 10001 + + + + + + + + 10010 + + + + + + + + 10011 + + + + + + + + 10100 + + + + + + + + 10101 + + + + + + + + 10110 + + + + + + + + 10111 + + + + + + + + 11000 + + + + + + + + 11001 + + + + + + + + 11010 + + + + + + + + 11011 + + + + + + + + + + + + {NUM_SRC} > 0 + + , {HALF}{SRC} + + + + + 00000000 + + src + + + + + + {O} || ({NUM_SRC} > 1) + + , {HALF}{SRC} + + + + + 00000000 + + src + + + + + + {HAS_SAMP} + + , s#{SAMP} + + + + + 0000 + + src->cat5.samp + + + + + s2en (indirect) / bindless case with a1.x has 8b samp + + {HAS_SAMP} + + , s#{SAMP} + + + + + 00000000 + + src->cat5.samp + + + + + + {HAS_TEX} + + , t#{TEX} + + + + + 0000000 + + src->cat5.tex + + + + + s2en (indirect) / bindless case only has 4b tex + + {HAS_TEX} + + , t#{TEX} + + + + + 0000 + + + src->cat5.samp >> 4 + + + + + + + {HAS_TYPE} + + ({TYPE}) + + + + + + src->cat5.type + + + + + + + + We don't actually display this enum, but it is useful to + document the various cases + + TODO we should probably have an option for uniforms w/out + display strings, but which have 'C' names that can be used + to generate header that the compiler can use + + + + Use traditional GL binding model, get texture and sampler index + from src3 which is not presumed to be uniform. This is + backwards-compatible with earlier generations, where this field was + always 0 and nonuniform-indexed sampling always worked. + + + + + The sampler base comes from the low 3 bits of a1.x, and the sampler + and texture index come from src3 which is presumed to be uniform. + + + + + The texture and sampler share the same base, and the sampler and + texture index come from src3 which is *not* presumed to be uniform. + + + + + The sampler base comes from the low 3 bits of a1.x, and the sampler + and texture index come from src3 which is *not* presumed to be + uniform. + + + + + Use traditional GL binding model, get texture and sampler index + from src3 which is presumed to be uniform. + + + + + The texture and sampler share the same base, and the sampler and + texture index come from src3 which is presumed to be uniform. + + + + + The texture and sampler share the same base, get sampler index from low + 4 bits of src3 and texture index from high 4 bits. + + + + + The sampler base comes from the low 3 bits of a1.x, and the texture + index comes from the next 8 bits of a1.x. The sampler index is an + immediate in src3. + + + + + + + {DESC_MODE} < 6 /* CAT5_BINDLESS_IMM */ + + + + + ({DESC_MODE} == 1) /* CAT5_BINDLESS_A1_UNIFORM */ || + ({DESC_MODE} == 2) /* CAT5_BINDLESS_NONUNIFORM */ || + ({DESC_MODE} == 3) /* CAT5_BINDLESS_A1_NONUNIFORM */ || + ({DESC_MODE} == 5) /* CAT5_BINDLESS_UNIFORM */ || + ({DESC_MODE} == 6) /* CAT5_BINDLESS_IMM */ || + ({DESC_MODE} == 7) /* CAT5_BINDLESS_A1_IMM */ + + + + + ({DESC_MODE} == 1) /* CAT5_BINDLESS_A1_UNIFORM */ || + ({DESC_MODE} == 3) /* CAT5_BINDLESS_A1_NONUNIFORM */ || + ({DESC_MODE} == 7) /* CAT5_BINDLESS_A1_IMM */ + + + + + ({DESC_MODE} == 1) /* CAT5_BINDLESS_A1_UNIFORM */ || + ({DESC_MODE} == 4) /* CAT5_UNIFORM */ || + ({DESC_MODE} == 5) /* CAT5_BINDLESS_UNIFORM */ + + + + bindless/indirect src3, which can either be GPR or samp/tex + + + , {SRC_HALF}{SRC} + + + + !{BINDLESS} + + + + + In the case that a1.x is used, all 8 bits encode sampler + + + {SAMP} + + + + + {SAMP}{TEX} + + + + + + + + + s->instr + s->instr + src + + + + diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml new file mode 100644 index 00000000000..fd7063b1645 --- /dev/null +++ b/src/freedreno/isa/ir3-cat6.xml @@ -0,0 +1,872 @@ + + + + + + + + + + + 110 + + src->cat6.type + + + + + + + + + + + LoaD Global + + + {SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+{SRC2}], {SIZE} + + + + + {SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}{OFF}], {SIZE} + + !{SRC2_REG} + + + + 1 + + 00000 + + + 1 + + + xxxxxxxxx + 00 + 00000 + + !(src->regs[2]->flags & IR3_REG_IMMED) + src->regs[2] + src->regs[2]->iim_val + src->regs[3]->uim_val + + + + + + STore Global + + + {SY}{JP}{NAME}.{TYPE} g[{SRC1}+{SRC2}], {SRC3}, {SIZE} + + + + + {SY}{JP}{NAME}.{TYPE} g[{SRC1}{OFF}], {SRC3}, {SIZE} + + !{G} + + ({OFF_HI} << 8) | {OFF_LO} + + + + + + x + + 00000 + xxxxxxxx + 1x + + + + + + x + 00011 + + src->regs[3]->uim_val + src->regs[4] + 1 + src->regs[2] + (src->flags & IR3_INSTR_G) && !(src->regs[4]->flags & IR3_REG_IMMED) + src->cat6.dst_offset + src->cat6.dst_offset >> 8 + + + + + + 1 + + + x + 1 + + + xxxxxxxxx + xx + + src->regs[2]->uim_val + src->regs[1] + src->regs[3]->uim_val + + + + + + LoaD Local + + + {SY}{JP}{NAME}.{TYPE} {DST}, l[{SRC}{OFF}], {SIZE} + + 00001 + + + + + LoaD Private + + + {SY}{JP}{NAME}.{TYPE} {DST}, p[{SRC}{OFF}], {SIZE} + + 00010 + + + + + LoaD Local (variant used for passing data between geom stages) + + + {SY}{JP}{NAME}.{TYPE} {DST}, l[{SRC}{OFF}], {SIZE} + + 01010 + + + + + LoaD Local Varying - read directly from varying storage + + + {SY}{JP}{NAME}.{TYPE} {DST}, l[{OFF}], {SIZE} + + 0 + + xxxxxxxx + 11 + + + xxxxxxxxx + xx + 11111 + + src->regs[2]->uim_val + src->regs[1]->uim_val + + + + + + ({OFF_HI} << 8) | {OFF_LO} + + + + + xxxxxxxxx + 1 + + + 1 + + xx + + + src->cat6.dst_offset >> 8 + src->cat6.dst_offset & 0xff + src->regs[2] + src->regs[1]" + src->regs[3]->uim_val + + + + + + STore Local + + + {SY}{JP}{NAME}.{TYPE} l[{DST}{OFF}], {SRC}, {SIZE} + + x + 00100 + + + + + STore Private + + + {SY}{JP}{NAME}.{TYPE} p[{DST}{OFF}], {SRC}, {SIZE} + + 0 + 00101 + + + + + STore Local (variant used for passing data between geom stages) + + + {SY}{JP}{NAME}.{TYPE} l[{DST}{OFF}], {SRC}, {SIZE} + + x + 01011 + + + + + STore Const - used for shader prolog (between shps and shpe) + to store "uniform folded" values into CONST file + + NOTE: TYPE field actually seems to be set to different + values (ie f32 vs u32), but I *think* it does not matter. + (There is SP_MODE_CONTROL.CONSTANT_DEMOTION_ENABLE, but + I think float results are already converted to 32b) + + NOTE: this could be the "old" encoding, although it + would conflict with stgb from earlier gens + + + {SY}{JP}{NAME} c[{DST}], {SRC}, {SIZE} + + + x + + xxxxxxxxxxxxxx + 1 + + xxxxx + + xxxxxxxxx + xx + 11100 + + src->regs[1]->uim_val + src->regs[2] + + + + + + {SY}{JP}{NAME}.{TYPE}.{D}d {DST}, g[{SSBO}] + + + + x + xxxxxxxx + + x + xx + xxxxxxxx + x + x + xxxxxxxx + + 0 + + + + x + + 01111 + + src->cat6.d - 1 + src->regs[1] + !!(src->regs[1]->flags & IR3_REG_IMMED) + + + + + + + {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE} {DST}, g[{SSBO}], {SRC1}, {SRC2} + + + + + + + x + xxxxxxxx + + + + + + + + + + + + + 0 + + + + x + + 11011 + + src->cat6.d - 1 + src + src->cat6.iim_val - 1 + src->regs[1] + !!(src->regs[1]->flags & IR3_REG_IMMED) + src->regs[2] + !!(src->regs[2]->flags & IR3_REG_IMMED) + src->regs[3] + !!(src->regs[3]->flags & IR3_REG_IMMED) + + + + + + {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE} g[{SSBO}], {SRC1}, {SRC2}, {SRC3} + + + + + + + 1 + + + + + xxxxxxxxx + + + + + + + + + + + + x + + + src->cat6.d - 1 + src + src->cat6.iim_val - 1 + src->regs[1] + !!(src->regs[1]->flags & IR3_REG_IMMED) + src->regs[2] + !!(src->regs[2]->flags & IR3_REG_IMMED) + src->regs[3] + !!(src->regs[3]->flags & IR3_REG_IMMED) + src->regs[4] + !!(src->regs[4]->flags & IR3_REG_IMMED) + + + + + 11100 + + + + 11101 + + + + + Base for atomic instructions (I think mostly a4xx+, as + a3xx didn't have real image/ssbo.. it was all just global). + Still used as of a6xx for local. + + NOTE that existing disasm and asm parser expect atomic inc/dec + to still have an extra src. For now, match that. + + + + + {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, g[{SSBO}], {SRC1}, {SRC2}, {SRC3} + + + + + + + + + {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.l {DST}, l[{SRC1}], {SRC2} + + + + + + 1 + xxxxxxxx + + + + + + + + + + + + + x + 00000000 + + 0 + + !!(src->flags & IR3_INSTR_G) + src + src->cat6.d - 1 + src->cat6.iim_val - 1 + src->regs[1] + !!(src->regs[1]->flags & IR3_REG_IMMED) + extract_cat6_SRC(src, 1) + !!(extract_cat6_SRC(src, 1)->flags & IR3_REG_IMMED) + extract_cat6_SRC(src, 2) + !!(extract_cat6_SRC(src, 2)->flags & IR3_REG_IMMED) + extract_cat6_SRC(src, 3) + !!(extract_cat6_SRC(src, 3)->flags & IR3_REG_IMMED) + + + + + + + + + + + + + 10000 + + + + 10001 + + + + 10010 + + + + 10011 + + + + 10100 + + + + 10101 + + + + 10110 + + + + 10111 + + + + 11000 + + + + 11001 + + + + 11010 + + + + + + + + Base for new instruction encoding that started being used + with a6xx for instructions supporting bindless mode. + + + + + + + + + 00 + + + + 0 + 00000 + + extract_cat6_DESC_MODE(src) + src->cat6.iim_val - 1 + !!(src->flags & IR3_INSTR_B) + src + + + + + + LoaD Constant - UBO load + + + + {K} + + {SY}{JP}{NAME}.{TYPE_SIZE}.k.{MODE}{BASE} c[a1.x], {SRC1}, {SRC2} + + + + + + {SY}{JP}{NAME}.offset{OFFSET}.{TYPE_SIZE}.{MODE}{BASE} {DST}, {SRC1}, {SRC2} + + x + + x + 011110 + 1xx + + + + + + + + + + x11 + + 1 + + 0 + !!(src->regs[2]->flags & IR3_REG_IMMED) + src->cat6.d + src->regs[2] + src->regs[1] + + + + + + GET Shader Processor ID? + + + {SY}{JP}{NAME}.{TYPE} {DST} + + + 0 + xx + x + 100100 + x1xx + xxxxxxxx + + xxxxxxxx + + 1x + + + + + GET Wavefront ID + + + {SY}{JP}{NAME}.{TYPE} {DST} + + + 0 + xx + x + 100101 + x1xx + xxxxxxxx + + xxxxxxxx + + 1x + + + + + RESourceINFO - returns image/ssbo dimensions (3 components) + + + {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.{MODE}{BASE} {DST}, {SSBO} + + + + + + 0 + + + 001111 + 0110 + xxxxxxxx + + + + + + 1x + + src->cat6.d - 1 + src + src->regs[1] + src->regs[2] + + + + + + IBO (ie. Image/SSBO) instructions + + + {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.{MODE}{BASE} {SRC1}, {SRC2}, {SSBO} + + + + + + + + 0110 + + + + + + + + + src + src->cat6.d - 1 + src->regs[1] + src->regs[3] + src->regs[2] + + + + + + STore IBo + + 0 + 011101 + 10 + + + + + LoaD IBo + + x + 000110 + 10 + + src->regs[0] + + + + + 1 + 010000 + 11 + + + + 1 + 010001 + 11 + + + + 1 + 010010 + 11 + + + + + + 1 + 010101 + 11 + + + + 1 + 010110 + 11 + + + + 1 + 010111 + 11 + + + + 1 + 011000 + 11 + + + + 1 + 011001 + 11 + + + + 1 + 011010 + 11 + + + + + + {D_MINUS_ONE} + 1 + + + + {TYPE_SIZE_MINUS_ONE} + 1 + + + + + {G} + + + + + {TYPED} + + typed + + + + untyped + + + + src->cat6.typed + + + + + + {BINDLESS} + + .base{BASE} + + + + + + src->cat6.base + + + + + + Source value that can be either immed or gpr + + + {SRC_IM} + + {IMMED} + + + + + r{GPR}.{SWIZ} + + + + + src->num >> 2 + src->num & 0x3 + src->iim_val + + + + + {MODE} == 0 + + + + + Source mode for "new" a6xx+ instruction encodings + + + + Immediate index. + + + + + Index from a uniform register (ie. does not depend on flow control) + + + + + Index from a non-uniform register (ie. potentially depends on flow control) + + + + + \ No newline at end of file diff --git a/src/freedreno/isa/ir3-cat7.xml b/src/freedreno/isa/ir3-cat7.xml new file mode 100644 index 00000000000..5e21359dcaf --- /dev/null +++ b/src/freedreno/isa/ir3-cat7.xml @@ -0,0 +1,63 @@ + + + + + + + + + + {SY}{JP}{NAME}{G}{L}{R}{W} + + xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + xxxxxxxxxxxx + x + x1xxxx + + + + + + + + 111 + + src->cat7.w + src->cat7.r + src->cat7.l + src->cat7.g + + + + + 0000 + + + + 0001 + + + \ No newline at end of file diff --git a/src/freedreno/isa/ir3-common.xml b/src/freedreno/isa/ir3-common.xml new file mode 100644 index 00000000000..0be7eeb4045 --- /dev/null +++ b/src/freedreno/isa/ir3-common.xml @@ -0,0 +1,353 @@ + + + + + + + + + (({SRC1_R} != 0) || ({SRC2_R} != 0)) && ({REPEAT} == 0) + + + + {SRC1_R} | ({SRC2_R} << 1) + + + + + + {GPR} == 61 /* a0.* */ + + + + {GPR} == 62 /* p0.x */ + + + + + + a0.{SWIZ} + + 111101 + + + + p0.{SWIZ} + + 111110 + + + r{GPR}.{SWIZ} + + + + + src->num >> 2 + src->num & 0x3 + + + + + + c{CONST}.{SWIZ} + + + + + src->num >> 2 + src->num & 0x3 + + + + + {OFFSET} == 0 + + + + + + r<a0.x> + + + + r<a0.x + {OFFSET}> + + + + src->array.offset + + + + + + + c<a0.x> + + + + c<a0.x + {OFFSET}> + + + + src->array.offset + + + + + + + + Encoding for instruction source which can be GPR/CONST/IMMED + or relative GPR/CONST. + + + extract_ABSNEG(src) + src + + + + + + + {ABSNEG}{SRC_R}h({IMMED}) + + + + {ABSNEG}{SRC_R}{IMMED} + + + 100 + + + src->uim_val + + + + + + Immediate with int->float lookup table: + + 0 -> 0.0 + 1 -> 0.5 + 2 -> 1.0 + 3 -> 2.0 + 4 -> e + 5 -> pi + 6 -> 1/pi + 7 -> 1/log2(e) + 8 -> log2(e) + 9 -> 1/log2(10) + 10 -> log2(10) + 11 -> 4.0 + + + + 101 + + + src->uim_val + + + + + + {ABSNEG}{SRC_R}{IMMED} + + 0 + + + + + {ABSNEG}{SRC_R}h{IMMED} + + 1 + + + + !{FULL} + + + + + {ABSNEG}{SRC_R}{HALF}{SRC} + + + + 000000 + + + + + + {ABSNEG}{SRC_R}{HALF}{SRC} + + + + x10 + + + + + 001 + + + + + + {ABSNEG}{SRC_R}{HALF}{SRC} + + + + 0 + + + + + {ABSNEG}{SRC_R}{HALF}{SRC} + + + + 1 + + + + + ({FULL} == {DST_CONV}) && ({DST} <= 0xf7 /* p0.x */) + + + + 1 + + + + 0 + + + + + 0 + + + 1 + + + 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + int to float lookup table + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/freedreno/isa/ir3.xml b/src/freedreno/isa/ir3.xml new file mode 100644 index 00000000000..fc5cb20885c --- /dev/null +++ b/src/freedreno/isa/ir3.xml @@ -0,0 +1,90 @@ + + + + + + + + + + + + + Encoding of an ir3 instruction. All instructions are 64b. + + + + + src->regs[0] + src->regs[1] + src->regs[2] + src->regs[3] + src->repeat + !!(src->flags & IR3_INSTR_SS) + !!(src->flags & IR3_INSTR_JP) + !!(src->flags & IR3_INSTR_SY) + !!(src->flags & IR3_INSTR_UL) + 0 + !!(src->flags & IR3_INSTR_SAT) + + + + + + + + + + + + + diff --git a/src/freedreno/isa/meson.build b/src/freedreno/isa/meson.build new file mode 100644 index 00000000000..4e08541ffca --- /dev/null +++ b/src/freedreno/isa/meson.build @@ -0,0 +1,103 @@ +# Copyright © 2020 Google, Inc + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +isa_depend_files = [ + 'ir3-common.xml', + 'ir3-cat0.xml', + 'ir3-cat1.xml', + 'ir3-cat2.xml', + 'ir3-cat3.xml', + 'ir3-cat4.xml', + 'ir3-cat5.xml', + 'ir3-cat6.xml', + 'ir3-cat7.xml', + 'isa.py', +] + +ir3_isa_c = custom_target( + 'ir3-isa.c', + input: ['decode.py', 'ir3.xml'], + output: 'ir3-isa.c', + command: [ + prog_python, '@INPUT0@', '@INPUT1@', '@OUTPUT@' + ], + depend_files: isa_depend_files, +) + +decode_files = [ + ir3_isa_c, + 'isa.h', + 'decode.h', + 'decode.c', +] + +libir3decode = static_library( + 'ir3decode', + decode_files, + dependencies: idep_mesautil, + include_directories: [ + inc_include, + inc_src, + # Hack for src/util/half_float.h indirect dependency on + # gallium headers: + inc_gallium, + ], + gnu_symbol_visibility: 'hidden', +) + +ir3disasm = executable( + 'ir3-disasm', + ['ir3-disasm.c'], + link_with: libir3decode, + build_by_default: with_tools.contains('freedreno'), + include_directories: [ + inc_src, + ], + install: false, +) + +encode_h = custom_target( + 'encode.h', + input: ['encode.py', 'ir3.xml'], + output: 'encode.h', + command: [ + prog_python, '@INPUT0@', '@INPUT1@', '@OUTPUT@' + ], + depend_files: isa_depend_files, +) + +encode_files = [ + encode_h, + 'encode.c', + 'isa.h', +] + +libir3encode = static_library( + 'ir3encode', + encode_files, + dependencies: [idep_mesautil, idep_nir], + include_directories: [ + inc_src, + inc_include, + inc_freedreno, + inc_gallium, + ], + gnu_symbol_visibility: 'hidden', +) diff --git a/src/freedreno/meson.build b/src/freedreno/meson.build index f6219502886..c29b8a3b871 100644 --- a/src/freedreno/meson.build +++ b/src/freedreno/meson.build @@ -42,6 +42,7 @@ install_fd_decode_tools = dep_libxml2.found() and prog_gzip.found() and \ subdir('common') subdir('registers') +subdir('isa') subdir('drm') subdir('ir2') subdir('ir3')