tu, freedreno/a6xx: Fix setting PC_XS_OUT_CNTL::PRIMITVE_ID

This is supposed to be set when that stage needs the PrimID sysval
preloaded, except for the VS which doesn't have this bit and instead
infers it from the HS or GS bit (depending on whether tess/GS is
enabled). Therefore for HS, GS, and DS we should set it whenever the
corresponding sysval is there. This includes adding a missing
PC_HS_OUT_CNTL, which I confirmed is set when the HS reads PrimID from
the VS. Note that the DS sysval is currently always enabled whenever
there's a GS, if we were to fix that then we should also change the
logic here.

This doesn't fix anything that I know of, but aligns us more with what
the blob does.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>
This commit is contained in:
Connor Abbott
2021-08-02 11:56:15 +02:00
committed by Marge Bot
parent 8115cde3ba
commit 2e2e6865b4
5 changed files with 51 additions and 26 deletions
+4 -4
View File
@@ -6335,7 +6335,7 @@ clusters:
00000002 PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
00000001 PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
00000001 PC_GS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
00000000 PC_PRIMITIVE_CNTL_3: 0
00000000 PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
00000001 PC_DS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
00000000 PC_PRIMITIVE_CNTL_5: { GS_VERTICES_OUT = 0 | GS_INVOCATIONS = 0 | GS_OUTPUT = TESS_POINTS }
00000000 PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
@@ -6595,7 +6595,7 @@ clusters:
00000002 PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
00000001 PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
00000001 PC_GS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
00000000 PC_PRIMITIVE_CNTL_3: 0
00000000 PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
00000001 PC_DS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
00000000 PC_PRIMITIVE_CNTL_5: { GS_VERTICES_OUT = 0 | GS_INVOCATIONS = 0 | GS_OUTPUT = TESS_POINTS }
00000000 PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
@@ -6860,7 +6860,7 @@ clusters:
00000002 PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
00000001 PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
00000001 PC_GS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
00000000 PC_PRIMITIVE_CNTL_3: 0
00000000 PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
00000001 PC_DS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
00000000 PC_PRIMITIVE_CNTL_5: { GS_VERTICES_OUT = 0 | GS_INVOCATIONS = 0 | GS_OUTPUT = TESS_POINTS }
00000000 PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
@@ -6887,7 +6887,7 @@ clusters:
00000002 PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
00000001 PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
00000001 PC_GS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
00000000 PC_PRIMITIVE_CNTL_3: 0
00000000 PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
00000001 PC_DS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
00000000 PC_PRIMITIVE_CNTL_5: { GS_VERTICES_OUT = 0 | GS_INVOCATIONS = 0 | GS_OUTPUT = TESS_POINTS }
00000000 PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
@@ -679,8 +679,8 @@ t4 write VPC_CNTL_0 (9304)
t4 write PC_VS_OUT_CNTL (9b01)
PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 4 | CLIP_MASK = 0 }
0000000001121090: 0000: 489b0101 00000004
t4 write PC_PRIMITIVE_CNTL_3 (9b03)
PC_PRIMITIVE_CNTL_3: 0
t4 write PC_HS_OUT_CNTL (9b03)
PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
0000000001121098: 0000: 409b0301 00000000
t4 write HLSQ_CONTROL_1_REG (b982)
HLSQ_CONTROL_1_REG: 0x7
@@ -1052,7 +1052,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
+ 00000000 0x9990: 00000000
!+ 00000002 PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
!+ 00000004 PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 4 | CLIP_MASK = 0 }
+ 00000000 PC_PRIMITIVE_CNTL_3: 0
+ 00000000 PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
+ 00000000 PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
+ 00000000 PC_MULTIVIEW_CNTL: { VIEWS = 0 }
+ 00000000 PC_UNKNOWN_9E72: 0
@@ -1973,8 +1973,8 @@ t4 write VPC_CNTL_0 (9304)
t4 write PC_VS_OUT_CNTL (9b01)
PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 4 | CLIP_MASK = 0 }
0000000001120090: 0000: 489b0101 00000004
t4 write PC_PRIMITIVE_CNTL_3 (9b03)
PC_PRIMITIVE_CNTL_3: 0
t4 write PC_HS_OUT_CNTL (9b03)
PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
0000000001120098: 0000: 409b0301 00000000
t4 write HLSQ_CONTROL_1_REG (b982)
HLSQ_CONTROL_1_REG: 0x7
@@ -5257,7 +5257,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
+ ffffffff PC_RESTART_INDEX: 4294967295
+ 00000002 PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
+ 00000004 PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 4 | CLIP_MASK = 0 }
+ 00000000 PC_PRIMITIVE_CNTL_3: 0
+ 00000000 PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
+ 00000000 PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
+ 00000101 VFD_CONTROL_0: { FETCH_CNT = 1 | DECODE_CNT = 1 }
+ fcfcfcfc VFD_CONTROL_1: { REGID4VTX = r63.x | REGID4INST = r63.x | REGID4PRIMID = r63.x | REGID4VIEWID = r63.x }
+2 -1
View File
@@ -2539,7 +2539,8 @@ to upconvert to 32b float internally?
<reg32 offset="0x9b01" name="PC_VS_OUT_CNTL" type="a6xx_xs_out_cntl"/>
<reg32 offset="0x9b02" name="PC_GS_OUT_CNTL" type="a6xx_xs_out_cntl"/>
<reg32 offset="0x9b03" name="PC_PRIMITIVE_CNTL_3" pos="11"/>
<!-- since HS can't output anything, only PRIMITIVE_ID is valid -->
<reg32 offset="0x9b03" name="PC_HS_OUT_CNTL" type="a6xx_xs_out_cntl"/>
<reg32 offset="0x9b04" name="PC_DS_OUT_CNTL" type="a6xx_xs_out_cntl"/>
<reg32 offset="0x9b05" name="PC_PRIMITIVE_CNTL_5">
+33 -12
View File
@@ -881,6 +881,17 @@ tu6_emit_vpc(struct tu_cs *cs,
REG_A6XX_VPC_VS_LAYER_CNTL,
REG_A6XX_GRAS_VS_LAYER_CNTL
},
[MESA_SHADER_TESS_CTRL] = {
0,
0,
0,
0,
0,
REG_A6XX_PC_HS_OUT_CNTL,
0,
0,
0
},
[MESA_SHADER_TESS_EVAL] = {
REG_A6XX_SP_DS_OUT_REG(0),
REG_A6XX_SP_DS_VPC_DST_REG(0),
@@ -950,8 +961,6 @@ tu6_emit_vpc(struct tu_cs *cs,
ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST0);
const uint32_t clip1_regid =
ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST1);
uint32_t primitive_regid = gs ?
ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) : regid(63, 0);
uint32_t flags_regid = gs ?
ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0;
@@ -1044,13 +1053,28 @@ tu6_emit_vpc(struct tu_cs *cs,
tu_cs_emit(cs, A6XX_GRAS_VS_CL_CNTL_CLIP_MASK(last_shader->clip_mask) |
A6XX_GRAS_VS_CL_CNTL_CULL_MASK(last_shader->cull_mask));
tu_cs_emit_pkt4(cs, cfg->reg_pc_xs_out_cntl, 1);
tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
CONDREG(primitive_regid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) |
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
const struct ir3_shader_variant *geom_shaders[] = { vs, hs, ds, gs };
for (unsigned i = 0; i < ARRAY_SIZE(geom_shaders); i++) {
const struct ir3_shader_variant *shader = geom_shaders[i];
if (!shader)
continue;
bool primid = shader->type != MESA_SHADER_VERTEX &&
VALIDREG(ir3_find_sysval_regid(shader, SYSTEM_VALUE_PRIMITIVE_ID));
tu_cs_emit_pkt4(cs, reg_config[shader->type].reg_pc_xs_out_cntl, 1);
if (shader == last_shader) {
tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) |
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
} else {
tu_cs_emit(cs, COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID));
}
}
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1);
tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt) |
@@ -1170,9 +1194,6 @@ tu6_emit_vpc(struct tu_cs *cs,
A6XX_PC_PRIMITIVE_CNTL_5_GS_OUTPUT(output) |
A6XX_PC_PRIMITIVE_CNTL_5_GS_INVOCATIONS(invocations));
tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1);
tu_cs_emit(cs, 0);
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_UNKNOWN_9100, 1);
tu_cs_emit(cs, 0xff);
@@ -360,9 +360,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
else
vs_primitive_regid = regid(63, 0);
bool hs_reads_primid = false, ds_reads_primid = false;
if (hs) {
tess_coord_x_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD);
tess_coord_y_regid = next_regid(tess_coord_x_regid, 1);
hs_reads_primid = VALIDREG(ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID));
ds_reads_primid = VALIDREG(ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID));
hs_rel_patch_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_REL_PATCH_ID_IR3);
ds_rel_patch_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_REL_PATCH_ID_IR3);
ds_primitive_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID);
@@ -696,8 +699,11 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
OUT_PKT4(ring, REG_A6XX_PC_DS_OUT_CNTL, 1);
OUT_RING(ring, A6XX_PC_DS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) |
CONDREG(psize_regid, A6XX_PC_DS_OUT_CNTL_PSIZE) |
COND(ds_reads_primid, A6XX_PC_DS_OUT_CNTL_PRIMITIVE_ID) |
A6XX_PC_DS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
OUT_PKT4(ring, REG_A6XX_PC_HS_OUT_CNTL, 1);
OUT_RING(ring, COND(hs_reads_primid, A6XX_PC_HS_OUT_CNTL_PRIMITIVE_ID));
} else {
OUT_PKT4(ring, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1);
OUT_RING(ring, 0);
@@ -720,9 +726,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1);
OUT_RING(ring, 0);
OUT_PKT4(ring, REG_A6XX_HLSQ_CONTROL_1_REG, 5);
OUT_RING(ring, 0x7); /* XXX */
OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |