diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index 2332f35cf42..b8a6186b148 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -287,6 +287,7 @@ typedef enum { OPC_GETSPID = _OPC(6, 36), /* SP ID */ OPC_GETWID = _OPC(6, 37), /* wavefront ID */ OPC_GETFIBERID = _OPC(6, 38), /* fiber ID */ + OPC_SHFL = _OPC(6, 39), /* Logical opcodes for things that differ in a6xx+ */ OPC_STC = _OPC(6, 40), diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 7748c9bebdd..293e1badacd 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -260,6 +260,14 @@ typedef enum { ALIAS_MEM = 4, } ir3_alias_scope; +typedef enum { + SHFL_XOR = 1, + SHFL_UP = 2, + SHFL_DOWN = 3, + SHFL_RUP = 6, + SHFL_RDOWN = 7, +} ir3_shfl_mode; + typedef enum ir3_instruction_flags { /* (sy) flag is set on first instruction, and after sample * instructions (probably just on RAW hazard). @@ -416,6 +424,7 @@ struct ir3_instruction { unsigned d : 3; /* for ldc, component offset */ bool typed : 1; unsigned base : 3; + ir3_shfl_mode shfl_mode : 3; } cat6; struct { unsigned w : 1; /* write */ @@ -2956,6 +2965,7 @@ INSTR1(QUAD_SHUFFLE_DIAG) INSTR2NODST(LDC_K) INSTR2NODST(STC) INSTR2NODST(STSC) +INSTR2(SHFL) #ifndef GPU #elif GPU >= 600 INSTR4NODST(STIB); diff --git a/src/freedreno/ir3/ir3_lexer.l b/src/freedreno/ir3/ir3_lexer.l index be47864b81d..77bada5920a 100644 --- a/src/freedreno/ir3/ir3_lexer.l +++ b/src/freedreno/ir3/ir3_lexer.l @@ -382,6 +382,7 @@ static int parse_reg(const char *str) "getfiberid" return TOKEN(T_OP_GETFIBERID); "stc" return TOKEN(T_OP_STC); "stsc" return TOKEN(T_OP_STSC); +"shfl" return TOKEN(T_OP_SHFL); ("b16"|"b32"){1} ir3_yylval.str = yytext; return T_INSTR_TYPE; @@ -447,6 +448,12 @@ static int parse_reg(const char *str) "mem" return T_MOD_MEM; "rt" return T_MOD_RT; +"xor" return T_MOD_XOR; +"up" return T_MOD_UP; +"down" return T_MOD_DOWN; +"rup" return T_MOD_RUP; +"rdown" return T_MOD_RDOWN; + "h" return 'h'; "=" return '='; "(" return '('; diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y index 712dfffbf7e..f4efc29bf1d 100644 --- a/src/freedreno/ir3/ir3_parser.y +++ b/src/freedreno/ir3/ir3_parser.y @@ -645,6 +645,7 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token T_OP_GETFIBERID %token T_OP_STC %token T_OP_STSC +%token T_OP_SHFL /* category 7: */ %token T_OP_BAR @@ -716,6 +717,12 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token T_MOD_MEM %token T_MOD_RT +%token T_MOD_XOR +%token T_MOD_UP +%token T_MOD_DOWN +%token T_MOD_RUP +%token T_MOD_RDOWN + %type integer offset uoffset %type flut_immed %type float @@ -1332,6 +1339,20 @@ cat6_stc: T_OP_STC { new_instr(OPC_STC); } cat6_type 'c' '[' const_dst ']' ',' src_reg ',' cat6_immed | T_OP_STSC { new_instr(OPC_STSC); } cat6_type 'c' '[' const_dst ']' ',' immediate ',' cat6_immed +cat6_shfl_mode: T_MOD_XOR { instr->cat6.shfl_mode = SHFL_XOR; } +| T_MOD_UP { instr->cat6.shfl_mode = SHFL_UP; } +| T_MOD_DOWN { instr->cat6.shfl_mode = SHFL_DOWN; } +| T_MOD_RUP { instr->cat6.shfl_mode = SHFL_RUP; } +| T_MOD_RDOWN { instr->cat6.shfl_mode = SHFL_RDOWN; } + /* This is added to make it easy to experiment with the + * unknown modes. + */ +| integer { instr->cat6.shfl_mode = $1; } + +cat6_shfl: + T_OP_SHFL { new_instr(OPC_SHFL); } '.' cat6_shfl_mode cat6_type dst ',' src ',' cat6_reg_or_immed + + cat6_todo: T_OP_G2L { new_instr(OPC_G2L); } | T_OP_L2G { new_instr(OPC_L2G); } | T_OP_RESFMT { new_instr(OPC_RESFMT); } @@ -1347,6 +1368,7 @@ cat6_instr: cat6_load | cat6_bindless_ldc | cat6_bindless_ibo | cat6_stc +| cat6_shfl | cat6_todo cat7_scope: '.' 'w' { instr->cat7.w = true; } diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c index 480c71023ed..56a4374ae92 100644 --- a/src/freedreno/ir3/tests/disasm.c +++ b/src/freedreno/ir3/tests/disasm.c @@ -437,6 +437,21 @@ static const struct test { /* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */ INSTR_6XX(c0260001_00c98000, "getfiberid.u32 r0.y"), + /* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_bvec4_constant */ + INSTR_6XX(c6e4400d_05800002, "shfl.up.u16 hr3.y, hr0.y, 5"), + /* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_f16vec3 */ + INSTR_6XX(c6e44017_c0000018, "shfl.up.u16 hr5.w, hr3.x, r48.x"), + /* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_uvec3_constant */ + INSTR_6XX(c6e64006_05800000, "shfl.up.u32 r1.z, r0.x, 5"), + /* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_ivec3_dynamically_uniform */ + INSTR_6XX(c6e64007_05000004, "shfl.up.u32 r1.w, r0.z, r1.y"), + /* dEQP-VK.subgroups.shuffle.graphics.subgroupshuffledown_i8vec3 */ + INSTR_6XX(c6e46011_c1000014, "shfl.down.u16 hr4.y, hr2.z, r48.y"), + /* dEQP-VK.memory_model.write_after_read.ext.u32.coherent.fence_atomic.atomicwrite.subgroup.payload_local.image.guard_local.image.frag */ + INSTR_6XX(c6e62005_3f800008, "shfl.xor.u32 r1.y, r1.x, 63"), + /* dEQP-VK.subgroups.shuffle.graphics.subgroupshuffle_bvec4 */ + INSTR_6XX(c6e4c012_c0000020, "shfl.rup.u16 hr4.z, hr4.x, r48.x"), + /* Custom test since we've never seen the blob emit these. */ INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"), INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"), diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml index 691caef169f..5ce35003c21 100644 --- a/src/freedreno/isa/ir3-cat6.xml +++ b/src/freedreno/isa/ir3-cat6.xml @@ -1322,7 +1322,55 @@ SOFTWARE. 11 + + + + + + + + + + Subgroup shuffle/rotate operations. + + shfl.xor, shfl.up, shfl.down are like Vulkan subgroupShuffleXor, + subgroupShuffleUp, subgroupShuffleDown except that the mask/delta + has to be dynamically uniform. + + shfl.rdown is Vulkan subgroupRotate. shfl.rup is like subgroupRotate + with a negative delta. + + NOTE: SRC2 is always a full register, regardless of TYPE. The blob + disassembles it as a half register when TYPE is u16 though. We don't + copy this mistake here. + + + + + + {SY}{JP}{NAME}.{MODE}.{TYPE} {TYPE_HALF}{DST}, {TYPE_HALF}{SRC1}, {SRC2} + + + 0 + + 00000000000000 + + + + + + xxxxx + + x + 10 + 11011 + + + src->cat6.shfl_mode + !!(src->srcs[1]->flags & IR3_REG_IMMED) + + {D_MINUS_ONE} + 1