ir3/isa: add isaspec definition for shfl
Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31358>
This commit is contained in:
@@ -287,6 +287,7 @@ typedef enum {
|
||||
OPC_GETSPID = _OPC(6, 36), /* SP ID */
|
||||
OPC_GETWID = _OPC(6, 37), /* wavefront ID */
|
||||
OPC_GETFIBERID = _OPC(6, 38), /* fiber ID */
|
||||
OPC_SHFL = _OPC(6, 39),
|
||||
|
||||
/* Logical opcodes for things that differ in a6xx+ */
|
||||
OPC_STC = _OPC(6, 40),
|
||||
|
||||
@@ -260,6 +260,14 @@ typedef enum {
|
||||
ALIAS_MEM = 4,
|
||||
} ir3_alias_scope;
|
||||
|
||||
typedef enum {
|
||||
SHFL_XOR = 1,
|
||||
SHFL_UP = 2,
|
||||
SHFL_DOWN = 3,
|
||||
SHFL_RUP = 6,
|
||||
SHFL_RDOWN = 7,
|
||||
} ir3_shfl_mode;
|
||||
|
||||
typedef enum ir3_instruction_flags {
|
||||
/* (sy) flag is set on first instruction, and after sample
|
||||
* instructions (probably just on RAW hazard).
|
||||
@@ -416,6 +424,7 @@ struct ir3_instruction {
|
||||
unsigned d : 3; /* for ldc, component offset */
|
||||
bool typed : 1;
|
||||
unsigned base : 3;
|
||||
ir3_shfl_mode shfl_mode : 3;
|
||||
} cat6;
|
||||
struct {
|
||||
unsigned w : 1; /* write */
|
||||
@@ -2956,6 +2965,7 @@ INSTR1(QUAD_SHUFFLE_DIAG)
|
||||
INSTR2NODST(LDC_K)
|
||||
INSTR2NODST(STC)
|
||||
INSTR2NODST(STSC)
|
||||
INSTR2(SHFL)
|
||||
#ifndef GPU
|
||||
#elif GPU >= 600
|
||||
INSTR4NODST(STIB);
|
||||
|
||||
@@ -382,6 +382,7 @@ static int parse_reg(const char *str)
|
||||
"getfiberid" return TOKEN(T_OP_GETFIBERID);
|
||||
"stc" return TOKEN(T_OP_STC);
|
||||
"stsc" return TOKEN(T_OP_STSC);
|
||||
"shfl" return TOKEN(T_OP_SHFL);
|
||||
|
||||
("b16"|"b32"){1} ir3_yylval.str = yytext; return T_INSTR_TYPE;
|
||||
|
||||
@@ -447,6 +448,12 @@ static int parse_reg(const char *str)
|
||||
"mem" return T_MOD_MEM;
|
||||
"rt" return T_MOD_RT;
|
||||
|
||||
"xor" return T_MOD_XOR;
|
||||
"up" return T_MOD_UP;
|
||||
"down" return T_MOD_DOWN;
|
||||
"rup" return T_MOD_RUP;
|
||||
"rdown" return T_MOD_RDOWN;
|
||||
|
||||
"h" return 'h';
|
||||
"=" return '=';
|
||||
"(" return '(';
|
||||
|
||||
@@ -645,6 +645,7 @@ static void print_token(FILE *file, int type, YYSTYPE value)
|
||||
%token <tok> T_OP_GETFIBERID
|
||||
%token <tok> T_OP_STC
|
||||
%token <tok> T_OP_STSC
|
||||
%token <tok> T_OP_SHFL
|
||||
|
||||
/* category 7: */
|
||||
%token <tok> T_OP_BAR
|
||||
@@ -716,6 +717,12 @@ static void print_token(FILE *file, int type, YYSTYPE value)
|
||||
%token <tok> T_MOD_MEM
|
||||
%token <tok> T_MOD_RT
|
||||
|
||||
%token <tok> T_MOD_XOR
|
||||
%token <tok> T_MOD_UP
|
||||
%token <tok> T_MOD_DOWN
|
||||
%token <tok> T_MOD_RUP
|
||||
%token <tok> T_MOD_RDOWN
|
||||
|
||||
%type <num> integer offset uoffset
|
||||
%type <num> flut_immed
|
||||
%type <flt> float
|
||||
@@ -1332,6 +1339,20 @@ cat6_stc:
|
||||
T_OP_STC { new_instr(OPC_STC); } cat6_type 'c' '[' const_dst ']' ',' src_reg ',' cat6_immed
|
||||
| T_OP_STSC { new_instr(OPC_STSC); } cat6_type 'c' '[' const_dst ']' ',' immediate ',' cat6_immed
|
||||
|
||||
cat6_shfl_mode: T_MOD_XOR { instr->cat6.shfl_mode = SHFL_XOR; }
|
||||
| T_MOD_UP { instr->cat6.shfl_mode = SHFL_UP; }
|
||||
| T_MOD_DOWN { instr->cat6.shfl_mode = SHFL_DOWN; }
|
||||
| T_MOD_RUP { instr->cat6.shfl_mode = SHFL_RUP; }
|
||||
| T_MOD_RDOWN { instr->cat6.shfl_mode = SHFL_RDOWN; }
|
||||
/* This is added to make it easy to experiment with the
|
||||
* unknown modes.
|
||||
*/
|
||||
| integer { instr->cat6.shfl_mode = $1; }
|
||||
|
||||
cat6_shfl:
|
||||
T_OP_SHFL { new_instr(OPC_SHFL); } '.' cat6_shfl_mode cat6_type dst ',' src ',' cat6_reg_or_immed
|
||||
|
||||
|
||||
cat6_todo: T_OP_G2L { new_instr(OPC_G2L); }
|
||||
| T_OP_L2G { new_instr(OPC_L2G); }
|
||||
| T_OP_RESFMT { new_instr(OPC_RESFMT); }
|
||||
@@ -1347,6 +1368,7 @@ cat6_instr: cat6_load
|
||||
| cat6_bindless_ldc
|
||||
| cat6_bindless_ibo
|
||||
| cat6_stc
|
||||
| cat6_shfl
|
||||
| cat6_todo
|
||||
|
||||
cat7_scope: '.' 'w' { instr->cat7.w = true; }
|
||||
|
||||
@@ -437,6 +437,21 @@ static const struct test {
|
||||
/* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */
|
||||
INSTR_6XX(c0260001_00c98000, "getfiberid.u32 r0.y"),
|
||||
|
||||
/* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_bvec4_constant */
|
||||
INSTR_6XX(c6e4400d_05800002, "shfl.up.u16 hr3.y, hr0.y, 5"),
|
||||
/* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_f16vec3 */
|
||||
INSTR_6XX(c6e44017_c0000018, "shfl.up.u16 hr5.w, hr3.x, r48.x"),
|
||||
/* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_uvec3_constant */
|
||||
INSTR_6XX(c6e64006_05800000, "shfl.up.u32 r1.z, r0.x, 5"),
|
||||
/* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_ivec3_dynamically_uniform */
|
||||
INSTR_6XX(c6e64007_05000004, "shfl.up.u32 r1.w, r0.z, r1.y"),
|
||||
/* dEQP-VK.subgroups.shuffle.graphics.subgroupshuffledown_i8vec3 */
|
||||
INSTR_6XX(c6e46011_c1000014, "shfl.down.u16 hr4.y, hr2.z, r48.y"),
|
||||
/* dEQP-VK.memory_model.write_after_read.ext.u32.coherent.fence_atomic.atomicwrite.subgroup.payload_local.image.guard_local.image.frag */
|
||||
INSTR_6XX(c6e62005_3f800008, "shfl.xor.u32 r1.y, r1.x, 63"),
|
||||
/* dEQP-VK.subgroups.shuffle.graphics.subgroupshuffle_bvec4 */
|
||||
INSTR_6XX(c6e4c012_c0000020, "shfl.rup.u16 hr4.z, hr4.x, r48.x"),
|
||||
|
||||
/* Custom test since we've never seen the blob emit these. */
|
||||
INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"),
|
||||
INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"),
|
||||
|
||||
@@ -1322,7 +1322,55 @@ SOFTWARE.
|
||||
<pattern low="52" high="53">11</pattern>
|
||||
</bitset>
|
||||
|
||||
<enum name="#cat6-shfl-mode">
|
||||
<value val="1" display="xor"/>
|
||||
<value val="2" display="up"/>
|
||||
<value val="3" display="down"/>
|
||||
<value val="6" display="rup"/>
|
||||
<value val="7" display="rdown"/>
|
||||
</enum>
|
||||
|
||||
<bitset name="shfl" extends="#instruction-cat6-a3xx">
|
||||
<doc>
|
||||
Subgroup shuffle/rotate operations.
|
||||
|
||||
shfl.xor, shfl.up, shfl.down are like Vulkan subgroupShuffleXor,
|
||||
subgroupShuffleUp, subgroupShuffleDown except that the mask/delta
|
||||
has to be dynamically uniform.
|
||||
|
||||
shfl.rdown is Vulkan subgroupRotate. shfl.rup is like subgroupRotate
|
||||
with a negative delta.
|
||||
|
||||
NOTE: SRC2 is always a full register, regardless of TYPE. The blob
|
||||
disassembles it as a half register when TYPE is u16 though. We don't
|
||||
copy this mistake here.
|
||||
</doc>
|
||||
|
||||
<gen min="600"/>
|
||||
|
||||
<display>
|
||||
{SY}{JP}{NAME}.{MODE}.{TYPE} {TYPE_HALF}{DST}, {TYPE_HALF}{SRC1}, {SRC2}
|
||||
</display>
|
||||
|
||||
<pattern low="0" high="0">0</pattern>
|
||||
<field low="1" high="8" name="SRC1" type="#reg-gpr"/>
|
||||
<pattern low="9" high="22">00000000000000</pattern>
|
||||
<field low="23" high="23" name="SRC2_IM" type="bool"/>
|
||||
<field low="24" high="31" name="SRC2" type="#cat6-src">
|
||||
<param name="SRC2_IM" as="SRC_IM"/>
|
||||
</field>
|
||||
<field low="32" high="39" name="DST" type="#reg-gpr"/>
|
||||
<pattern low="40" high="44">xxxxx</pattern>
|
||||
<field low="45" high="47" name="MODE" type="#cat6-shfl-mode"/>
|
||||
<pattern low="48" high="48">x</pattern>
|
||||
<pattern low="52" high="53">10</pattern>
|
||||
<pattern low="54" high="58">11011</pattern> <!-- OPC -->
|
||||
|
||||
<encode>
|
||||
<map name="MODE">src->cat6.shfl_mode</map>
|
||||
<map name="SRC2_IM">!!(src->srcs[1]->flags & IR3_REG_IMMED)</map>
|
||||
</encode>
|
||||
</bitset>
|
||||
|
||||
<expr name="#cat6-d">
|
||||
{D_MINUS_ONE} + 1
|
||||
|
||||
Reference in New Issue
Block a user