tu, freedreno/a6xx: Fix setting PC_XS_OUT_CNTL::PRIMITVE_ID
This is supposed to be set when that stage needs the PrimID sysval preloaded, except for the VS which doesn't have this bit and instead infers it from the HS or GS bit (depending on whether tess/GS is enabled). Therefore for HS, GS, and DS we should set it whenever the corresponding sysval is there. This includes adding a missing PC_HS_OUT_CNTL, which I confirmed is set when the HS reads PrimID from the VS. Note that the DS sysval is currently always enabled whenever there's a GS, if we were to fix that then we should also change the logic here. This doesn't fix anything that I know of, but aligns us more with what the blob does. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>
This commit is contained in:
@@ -6335,7 +6335,7 @@ clusters:
|
||||
00000002 PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
|
||||
00000001 PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
|
||||
00000001 PC_GS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
|
||||
00000000 PC_PRIMITIVE_CNTL_3: 0
|
||||
00000000 PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
|
||||
00000001 PC_DS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
|
||||
00000000 PC_PRIMITIVE_CNTL_5: { GS_VERTICES_OUT = 0 | GS_INVOCATIONS = 0 | GS_OUTPUT = TESS_POINTS }
|
||||
00000000 PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
|
||||
@@ -6595,7 +6595,7 @@ clusters:
|
||||
00000002 PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
|
||||
00000001 PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
|
||||
00000001 PC_GS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
|
||||
00000000 PC_PRIMITIVE_CNTL_3: 0
|
||||
00000000 PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
|
||||
00000001 PC_DS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
|
||||
00000000 PC_PRIMITIVE_CNTL_5: { GS_VERTICES_OUT = 0 | GS_INVOCATIONS = 0 | GS_OUTPUT = TESS_POINTS }
|
||||
00000000 PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
|
||||
@@ -6860,7 +6860,7 @@ clusters:
|
||||
00000002 PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
|
||||
00000001 PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
|
||||
00000001 PC_GS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
|
||||
00000000 PC_PRIMITIVE_CNTL_3: 0
|
||||
00000000 PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
|
||||
00000001 PC_DS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
|
||||
00000000 PC_PRIMITIVE_CNTL_5: { GS_VERTICES_OUT = 0 | GS_INVOCATIONS = 0 | GS_OUTPUT = TESS_POINTS }
|
||||
00000000 PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
|
||||
@@ -6887,7 +6887,7 @@ clusters:
|
||||
00000002 PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
|
||||
00000001 PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
|
||||
00000001 PC_GS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
|
||||
00000000 PC_PRIMITIVE_CNTL_3: 0
|
||||
00000000 PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
|
||||
00000001 PC_DS_OUT_CNTL: { STRIDE_IN_VPC = 1 | CLIP_MASK = 0 }
|
||||
00000000 PC_PRIMITIVE_CNTL_5: { GS_VERTICES_OUT = 0 | GS_INVOCATIONS = 0 | GS_OUTPUT = TESS_POINTS }
|
||||
00000000 PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
|
||||
|
||||
@@ -679,8 +679,8 @@ t4 write VPC_CNTL_0 (9304)
|
||||
t4 write PC_VS_OUT_CNTL (9b01)
|
||||
PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 4 | CLIP_MASK = 0 }
|
||||
0000000001121090: 0000: 489b0101 00000004
|
||||
t4 write PC_PRIMITIVE_CNTL_3 (9b03)
|
||||
PC_PRIMITIVE_CNTL_3: 0
|
||||
t4 write PC_HS_OUT_CNTL (9b03)
|
||||
PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
|
||||
0000000001121098: 0000: 409b0301 00000000
|
||||
t4 write HLSQ_CONTROL_1_REG (b982)
|
||||
HLSQ_CONTROL_1_REG: 0x7
|
||||
@@ -1052,7 +1052,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
|
||||
+ 00000000 0x9990: 00000000
|
||||
!+ 00000002 PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
|
||||
!+ 00000004 PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 4 | CLIP_MASK = 0 }
|
||||
+ 00000000 PC_PRIMITIVE_CNTL_3: 0
|
||||
+ 00000000 PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
|
||||
+ 00000000 PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
|
||||
+ 00000000 PC_MULTIVIEW_CNTL: { VIEWS = 0 }
|
||||
+ 00000000 PC_UNKNOWN_9E72: 0
|
||||
@@ -1973,8 +1973,8 @@ t4 write VPC_CNTL_0 (9304)
|
||||
t4 write PC_VS_OUT_CNTL (9b01)
|
||||
PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 4 | CLIP_MASK = 0 }
|
||||
0000000001120090: 0000: 489b0101 00000004
|
||||
t4 write PC_PRIMITIVE_CNTL_3 (9b03)
|
||||
PC_PRIMITIVE_CNTL_3: 0
|
||||
t4 write PC_HS_OUT_CNTL (9b03)
|
||||
PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
|
||||
0000000001120098: 0000: 409b0301 00000000
|
||||
t4 write HLSQ_CONTROL_1_REG (b982)
|
||||
HLSQ_CONTROL_1_REG: 0x7
|
||||
@@ -5257,7 +5257,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
|
||||
+ ffffffff PC_RESTART_INDEX: 4294967295
|
||||
+ 00000002 PC_PRIMITIVE_CNTL_0: { PROVOKING_VTX_LAST }
|
||||
+ 00000004 PC_VS_OUT_CNTL: { STRIDE_IN_VPC = 4 | CLIP_MASK = 0 }
|
||||
+ 00000000 PC_PRIMITIVE_CNTL_3: 0
|
||||
+ 00000000 PC_HS_OUT_CNTL: { STRIDE_IN_VPC = 0 | CLIP_MASK = 0 }
|
||||
+ 00000000 PC_PRIMITIVE_CNTL_6: { STRIDE_IN_VPC = 0 }
|
||||
+ 00000101 VFD_CONTROL_0: { FETCH_CNT = 1 | DECODE_CNT = 1 }
|
||||
+ fcfcfcfc VFD_CONTROL_1: { REGID4VTX = r63.x | REGID4INST = r63.x | REGID4PRIMID = r63.x | REGID4VIEWID = r63.x }
|
||||
|
||||
@@ -2539,7 +2539,8 @@ to upconvert to 32b float internally?
|
||||
|
||||
<reg32 offset="0x9b01" name="PC_VS_OUT_CNTL" type="a6xx_xs_out_cntl"/>
|
||||
<reg32 offset="0x9b02" name="PC_GS_OUT_CNTL" type="a6xx_xs_out_cntl"/>
|
||||
<reg32 offset="0x9b03" name="PC_PRIMITIVE_CNTL_3" pos="11"/>
|
||||
<!-- since HS can't output anything, only PRIMITIVE_ID is valid -->
|
||||
<reg32 offset="0x9b03" name="PC_HS_OUT_CNTL" type="a6xx_xs_out_cntl"/>
|
||||
<reg32 offset="0x9b04" name="PC_DS_OUT_CNTL" type="a6xx_xs_out_cntl"/>
|
||||
|
||||
<reg32 offset="0x9b05" name="PC_PRIMITIVE_CNTL_5">
|
||||
|
||||
@@ -881,6 +881,17 @@ tu6_emit_vpc(struct tu_cs *cs,
|
||||
REG_A6XX_VPC_VS_LAYER_CNTL,
|
||||
REG_A6XX_GRAS_VS_LAYER_CNTL
|
||||
},
|
||||
[MESA_SHADER_TESS_CTRL] = {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
REG_A6XX_PC_HS_OUT_CNTL,
|
||||
0,
|
||||
0,
|
||||
0
|
||||
},
|
||||
[MESA_SHADER_TESS_EVAL] = {
|
||||
REG_A6XX_SP_DS_OUT_REG(0),
|
||||
REG_A6XX_SP_DS_VPC_DST_REG(0),
|
||||
@@ -950,8 +961,6 @@ tu6_emit_vpc(struct tu_cs *cs,
|
||||
ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST0);
|
||||
const uint32_t clip1_regid =
|
||||
ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST1);
|
||||
uint32_t primitive_regid = gs ?
|
||||
ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) : regid(63, 0);
|
||||
uint32_t flags_regid = gs ?
|
||||
ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0;
|
||||
|
||||
@@ -1044,13 +1053,28 @@ tu6_emit_vpc(struct tu_cs *cs,
|
||||
tu_cs_emit(cs, A6XX_GRAS_VS_CL_CNTL_CLIP_MASK(last_shader->clip_mask) |
|
||||
A6XX_GRAS_VS_CL_CNTL_CULL_MASK(last_shader->cull_mask));
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_pc_xs_out_cntl, 1);
|
||||
tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
|
||||
CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
|
||||
CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
|
||||
CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
|
||||
CONDREG(primitive_regid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) |
|
||||
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
|
||||
const struct ir3_shader_variant *geom_shaders[] = { vs, hs, ds, gs };
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(geom_shaders); i++) {
|
||||
const struct ir3_shader_variant *shader = geom_shaders[i];
|
||||
if (!shader)
|
||||
continue;
|
||||
|
||||
bool primid = shader->type != MESA_SHADER_VERTEX &&
|
||||
VALIDREG(ir3_find_sysval_regid(shader, SYSTEM_VALUE_PRIMITIVE_ID));
|
||||
|
||||
tu_cs_emit_pkt4(cs, reg_config[shader->type].reg_pc_xs_out_cntl, 1);
|
||||
if (shader == last_shader) {
|
||||
tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
|
||||
CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
|
||||
CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
|
||||
CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
|
||||
COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) |
|
||||
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
|
||||
} else {
|
||||
tu_cs_emit(cs, COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID));
|
||||
}
|
||||
}
|
||||
|
||||
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1);
|
||||
tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt) |
|
||||
@@ -1170,9 +1194,6 @@ tu6_emit_vpc(struct tu_cs *cs,
|
||||
A6XX_PC_PRIMITIVE_CNTL_5_GS_OUTPUT(output) |
|
||||
A6XX_PC_PRIMITIVE_CNTL_5_GS_INVOCATIONS(invocations));
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1);
|
||||
tu_cs_emit(cs, 0);
|
||||
|
||||
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_UNKNOWN_9100, 1);
|
||||
tu_cs_emit(cs, 0xff);
|
||||
|
||||
|
||||
@@ -360,9 +360,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
else
|
||||
vs_primitive_regid = regid(63, 0);
|
||||
|
||||
bool hs_reads_primid = false, ds_reads_primid = false;
|
||||
if (hs) {
|
||||
tess_coord_x_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD);
|
||||
tess_coord_y_regid = next_regid(tess_coord_x_regid, 1);
|
||||
hs_reads_primid = VALIDREG(ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID));
|
||||
ds_reads_primid = VALIDREG(ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID));
|
||||
hs_rel_patch_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_REL_PATCH_ID_IR3);
|
||||
ds_rel_patch_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_REL_PATCH_ID_IR3);
|
||||
ds_primitive_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID);
|
||||
@@ -696,8 +699,11 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
OUT_PKT4(ring, REG_A6XX_PC_DS_OUT_CNTL, 1);
|
||||
OUT_RING(ring, A6XX_PC_DS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) |
|
||||
CONDREG(psize_regid, A6XX_PC_DS_OUT_CNTL_PSIZE) |
|
||||
COND(ds_reads_primid, A6XX_PC_DS_OUT_CNTL_PRIMITIVE_ID) |
|
||||
A6XX_PC_DS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_PC_HS_OUT_CNTL, 1);
|
||||
OUT_RING(ring, COND(hs_reads_primid, A6XX_PC_HS_OUT_CNTL_PRIMITIVE_ID));
|
||||
} else {
|
||||
OUT_PKT4(ring, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1);
|
||||
OUT_RING(ring, 0);
|
||||
@@ -720,9 +726,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
|
||||
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1);
|
||||
OUT_RING(ring, 0);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_HLSQ_CONTROL_1_REG, 5);
|
||||
OUT_RING(ring, 0x7); /* XXX */
|
||||
OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
|
||||
|
||||
Reference in New Issue
Block a user