ir3/isa: add isaspec definition for shfl

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31358>
This commit is contained in:
Job Noorman
2024-09-25 12:04:45 +02:00
committed by Marge Bot
parent fe57b10221
commit fb7b7401a5
6 changed files with 103 additions and 0 deletions
+1
View File
@@ -287,6 +287,7 @@ typedef enum {
OPC_GETSPID = _OPC(6, 36), /* SP ID */
OPC_GETWID = _OPC(6, 37), /* wavefront ID */
OPC_GETFIBERID = _OPC(6, 38), /* fiber ID */
OPC_SHFL = _OPC(6, 39),
/* Logical opcodes for things that differ in a6xx+ */
OPC_STC = _OPC(6, 40),
+10
View File
@@ -260,6 +260,14 @@ typedef enum {
ALIAS_MEM = 4,
} ir3_alias_scope;
typedef enum {
SHFL_XOR = 1,
SHFL_UP = 2,
SHFL_DOWN = 3,
SHFL_RUP = 6,
SHFL_RDOWN = 7,
} ir3_shfl_mode;
typedef enum ir3_instruction_flags {
/* (sy) flag is set on first instruction, and after sample
* instructions (probably just on RAW hazard).
@@ -416,6 +424,7 @@ struct ir3_instruction {
unsigned d : 3; /* for ldc, component offset */
bool typed : 1;
unsigned base : 3;
ir3_shfl_mode shfl_mode : 3;
} cat6;
struct {
unsigned w : 1; /* write */
@@ -2956,6 +2965,7 @@ INSTR1(QUAD_SHUFFLE_DIAG)
INSTR2NODST(LDC_K)
INSTR2NODST(STC)
INSTR2NODST(STSC)
INSTR2(SHFL)
#ifndef GPU
#elif GPU >= 600
INSTR4NODST(STIB);
+7
View File
@@ -382,6 +382,7 @@ static int parse_reg(const char *str)
"getfiberid" return TOKEN(T_OP_GETFIBERID);
"stc" return TOKEN(T_OP_STC);
"stsc" return TOKEN(T_OP_STSC);
"shfl" return TOKEN(T_OP_SHFL);
("b16"|"b32"){1} ir3_yylval.str = yytext; return T_INSTR_TYPE;
@@ -447,6 +448,12 @@ static int parse_reg(const char *str)
"mem" return T_MOD_MEM;
"rt" return T_MOD_RT;
"xor" return T_MOD_XOR;
"up" return T_MOD_UP;
"down" return T_MOD_DOWN;
"rup" return T_MOD_RUP;
"rdown" return T_MOD_RDOWN;
"h" return 'h';
"=" return '=';
"(" return '(';
+22
View File
@@ -645,6 +645,7 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_GETFIBERID
%token <tok> T_OP_STC
%token <tok> T_OP_STSC
%token <tok> T_OP_SHFL
/* category 7: */
%token <tok> T_OP_BAR
@@ -716,6 +717,12 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_MOD_MEM
%token <tok> T_MOD_RT
%token <tok> T_MOD_XOR
%token <tok> T_MOD_UP
%token <tok> T_MOD_DOWN
%token <tok> T_MOD_RUP
%token <tok> T_MOD_RDOWN
%type <num> integer offset uoffset
%type <num> flut_immed
%type <flt> float
@@ -1332,6 +1339,20 @@ cat6_stc:
T_OP_STC { new_instr(OPC_STC); } cat6_type 'c' '[' const_dst ']' ',' src_reg ',' cat6_immed
| T_OP_STSC { new_instr(OPC_STSC); } cat6_type 'c' '[' const_dst ']' ',' immediate ',' cat6_immed
cat6_shfl_mode: T_MOD_XOR { instr->cat6.shfl_mode = SHFL_XOR; }
| T_MOD_UP { instr->cat6.shfl_mode = SHFL_UP; }
| T_MOD_DOWN { instr->cat6.shfl_mode = SHFL_DOWN; }
| T_MOD_RUP { instr->cat6.shfl_mode = SHFL_RUP; }
| T_MOD_RDOWN { instr->cat6.shfl_mode = SHFL_RDOWN; }
/* This is added to make it easy to experiment with the
* unknown modes.
*/
| integer { instr->cat6.shfl_mode = $1; }
cat6_shfl:
T_OP_SHFL { new_instr(OPC_SHFL); } '.' cat6_shfl_mode cat6_type dst ',' src ',' cat6_reg_or_immed
cat6_todo: T_OP_G2L { new_instr(OPC_G2L); }
| T_OP_L2G { new_instr(OPC_L2G); }
| T_OP_RESFMT { new_instr(OPC_RESFMT); }
@@ -1347,6 +1368,7 @@ cat6_instr: cat6_load
| cat6_bindless_ldc
| cat6_bindless_ibo
| cat6_stc
| cat6_shfl
| cat6_todo
cat7_scope: '.' 'w' { instr->cat7.w = true; }
+15
View File
@@ -437,6 +437,21 @@ static const struct test {
/* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */
INSTR_6XX(c0260001_00c98000, "getfiberid.u32 r0.y"),
/* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_bvec4_constant */
INSTR_6XX(c6e4400d_05800002, "shfl.up.u16 hr3.y, hr0.y, 5"),
/* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_f16vec3 */
INSTR_6XX(c6e44017_c0000018, "shfl.up.u16 hr5.w, hr3.x, r48.x"),
/* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_uvec3_constant */
INSTR_6XX(c6e64006_05800000, "shfl.up.u32 r1.z, r0.x, 5"),
/* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_ivec3_dynamically_uniform */
INSTR_6XX(c6e64007_05000004, "shfl.up.u32 r1.w, r0.z, r1.y"),
/* dEQP-VK.subgroups.shuffle.graphics.subgroupshuffledown_i8vec3 */
INSTR_6XX(c6e46011_c1000014, "shfl.down.u16 hr4.y, hr2.z, r48.y"),
/* dEQP-VK.memory_model.write_after_read.ext.u32.coherent.fence_atomic.atomicwrite.subgroup.payload_local.image.guard_local.image.frag */
INSTR_6XX(c6e62005_3f800008, "shfl.xor.u32 r1.y, r1.x, 63"),
/* dEQP-VK.subgroups.shuffle.graphics.subgroupshuffle_bvec4 */
INSTR_6XX(c6e4c012_c0000020, "shfl.rup.u16 hr4.z, hr4.x, r48.x"),
/* Custom test since we've never seen the blob emit these. */
INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"),
INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"),
+48
View File
@@ -1322,7 +1322,55 @@ SOFTWARE.
<pattern low="52" high="53">11</pattern>
</bitset>
<enum name="#cat6-shfl-mode">
<value val="1" display="xor"/>
<value val="2" display="up"/>
<value val="3" display="down"/>
<value val="6" display="rup"/>
<value val="7" display="rdown"/>
</enum>
<bitset name="shfl" extends="#instruction-cat6-a3xx">
<doc>
Subgroup shuffle/rotate operations.
shfl.xor, shfl.up, shfl.down are like Vulkan subgroupShuffleXor,
subgroupShuffleUp, subgroupShuffleDown except that the mask/delta
has to be dynamically uniform.
shfl.rdown is Vulkan subgroupRotate. shfl.rup is like subgroupRotate
with a negative delta.
NOTE: SRC2 is always a full register, regardless of TYPE. The blob
disassembles it as a half register when TYPE is u16 though. We don't
copy this mistake here.
</doc>
<gen min="600"/>
<display>
{SY}{JP}{NAME}.{MODE}.{TYPE} {TYPE_HALF}{DST}, {TYPE_HALF}{SRC1}, {SRC2}
</display>
<pattern low="0" high="0">0</pattern>
<field low="1" high="8" name="SRC1" type="#reg-gpr"/>
<pattern low="9" high="22">00000000000000</pattern>
<field low="23" high="23" name="SRC2_IM" type="bool"/>
<field low="24" high="31" name="SRC2" type="#cat6-src">
<param name="SRC2_IM" as="SRC_IM"/>
</field>
<field low="32" high="39" name="DST" type="#reg-gpr"/>
<pattern low="40" high="44">xxxxx</pattern>
<field low="45" high="47" name="MODE" type="#cat6-shfl-mode"/>
<pattern low="48" high="48">x</pattern>
<pattern low="52" high="53">10</pattern>
<pattern low="54" high="58">11011</pattern> <!-- OPC -->
<encode>
<map name="MODE">src->cat6.shfl_mode</map>
<map name="SRC2_IM">!!(src->srcs[1]->flags &amp; IR3_REG_IMMED)</map>
</encode>
</bitset>
<expr name="#cat6-d">
{D_MINUS_ONE} + 1