turnip: clean up primitive output state

We only need to emit one set of primitive output registers. This may differ
from the blob, because it seems to try to allow using the same pipeline
with tess/geom enabled/disabled.

Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5790>
This commit is contained in:
Jonathan Marek
2020-07-07 10:37:40 -04:00
committed by Marge Bot
parent 7748afbb1e
commit a1a80c38ea
3 changed files with 85 additions and 94 deletions
+1 -2
View File
@@ -524,8 +524,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_
.persp_division_disable = 1,
.vp_xform_disable = 1,
.vp_clip_code_ignore = 1,
.clip_disable = 1),
A6XX_GRAS_VS_CL_CNTL(0));
.clip_disable = 1));
tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL()); // XXX msaa enable?
tu_cs_emit_regs(cs,
-3
View File
@@ -746,7 +746,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A982, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A9A8, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5);
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_VS_LAYER_CNTL, 0x0000ffff);
/* TODO: set A6XX_VFD_ADD_OFFSET_INSTANCE and fix ir3 to avoid adding base instance */
tu_cs_emit_write_reg(cs, REG_A6XX_VFD_ADD_OFFSET, A6XX_VFD_ADD_OFFSET_VERTEX);
@@ -769,7 +768,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_VS_CLIP_CNTL, 0xffff00);
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9107, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9236,
@@ -791,7 +789,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8099, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_VS_LAYER_CNTL, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A0, 2);
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0);
+84 -89
View File
@@ -732,6 +732,53 @@ tu6_emit_vpc(struct tu_cs *cs,
const struct ir3_shader_variant *gs,
const struct ir3_shader_variant *fs)
{
/* note: doesn't compile as static because of the array regs.. */
const struct reg_config {
uint16_t reg_sp_xs_out_reg;
uint16_t reg_sp_xs_vpc_dst_reg;
uint16_t reg_vpc_xs_pack;
uint16_t reg_vpc_xs_clip_cntl;
uint16_t reg_gras_xs_cl_cntl;
uint16_t reg_pc_xs_out_cntl;
uint16_t reg_sp_xs_primitive_cntl;
uint16_t reg_vpc_xs_layer_cntl;
uint16_t reg_gras_xs_layer_cntl;
} reg_config[] = {
[MESA_SHADER_VERTEX] = {
REG_A6XX_SP_VS_OUT_REG(0),
REG_A6XX_SP_VS_VPC_DST_REG(0),
REG_A6XX_VPC_VS_PACK,
REG_A6XX_VPC_VS_CLIP_CNTL,
REG_A6XX_GRAS_VS_CL_CNTL,
REG_A6XX_PC_VS_OUT_CNTL,
REG_A6XX_SP_VS_PRIMITIVE_CNTL,
REG_A6XX_VPC_VS_LAYER_CNTL,
REG_A6XX_GRAS_VS_LAYER_CNTL
},
[MESA_SHADER_TESS_EVAL] = {
REG_A6XX_SP_DS_OUT_REG(0),
REG_A6XX_SP_DS_VPC_DST_REG(0),
REG_A6XX_VPC_DS_PACK,
REG_A6XX_VPC_DS_CLIP_CNTL,
REG_A6XX_GRAS_DS_CL_CNTL,
REG_A6XX_PC_DS_OUT_CNTL,
REG_A6XX_SP_DS_PRIMITIVE_CNTL,
REG_A6XX_VPC_DS_LAYER_CNTL,
REG_A6XX_GRAS_DS_LAYER_CNTL
},
[MESA_SHADER_GEOMETRY] = {
REG_A6XX_SP_GS_OUT_REG(0),
REG_A6XX_SP_GS_VPC_DST_REG(0),
REG_A6XX_VPC_GS_PACK,
REG_A6XX_VPC_GS_CLIP_CNTL,
REG_A6XX_GRAS_GS_CL_CNTL,
REG_A6XX_PC_GS_OUT_CNTL,
REG_A6XX_SP_GS_PRIMITIVE_CNTL,
REG_A6XX_VPC_GS_LAYER_CNTL,
REG_A6XX_GRAS_GS_LAYER_CNTL
},
};
const struct ir3_shader_variant *last_shader;
if (gs) {
last_shader = gs;
@@ -740,6 +787,9 @@ tu6_emit_vpc(struct tu_cs *cs,
} else {
last_shader = vs;
}
const struct reg_config *cfg = &reg_config[last_shader->type];
struct ir3_shader_linkage linkage = { .primid_loc = 0xff };
if (fs)
ir3_link_shaders(&linkage, last_shader, fs, true);
@@ -766,6 +816,10 @@ tu6_emit_vpc(struct tu_cs *cs,
ir3_find_output_regid(last_shader, VARYING_SLOT_PSIZ);
const uint32_t layer_regid = gs ?
ir3_find_output_regid(gs, VARYING_SLOT_LAYER) : regid(63, 0);
uint32_t primitive_regid = gs ?
ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) : regid(63, 0);
uint32_t flags_regid = gs ?
ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0;
uint32_t pointsize_loc = 0xff, position_loc = 0xff, layer_loc = 0xff;
if (layer_regid != regid(63, 0)) {
@@ -797,22 +851,39 @@ tu6_emit_vpc(struct tu_cs *cs,
A6XX_SP_VS_VPC_DST_REG_OUTLOC0(linkage.var[i].loc);
}
if (gs)
tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_OUT_REG(0), sp_out_count);
else if (hs)
tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_OUT_REG(0), sp_out_count);
else
tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_OUT_REG(0), sp_out_count);
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_out_reg, sp_out_count);
tu_cs_emit_array(cs, sp_out, sp_out_count);
if (gs)
tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_VPC_DST_REG(0), sp_vpc_dst_count);
else if (hs)
tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_VPC_DST_REG(0), sp_vpc_dst_count);
else
tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_VPC_DST_REG(0), sp_vpc_dst_count);
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_vpc_dst_reg, sp_vpc_dst_count);
tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count);
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_pack, 1);
tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) |
A6XX_VPC_VS_PACK_PSIZELOC(pointsize_loc) |
A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc));
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1);
tu_cs_emit(cs, 0xffff00);
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_cl_cntl, 1);
tu_cs_emit(cs, 0);
tu_cs_emit_pkt4(cs, cfg->reg_pc_xs_out_cntl, 1);
tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
CONDREG(primitive_regid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID));
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1);
tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt) |
A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid));
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1);
tu_cs_emit(cs, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | 0xff00);
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1);
tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER));
tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMID_CNTL, 1);
tu_cs_emit(cs, COND(primid_passthru, A6XX_PC_PRIMID_CNTL_PRIMID_PASSTHRU));
@@ -822,11 +893,6 @@ tu6_emit_vpc(struct tu_cs *cs,
A6XX_VPC_CNTL_0_PRIMIDLOC(linkage.primid_loc) |
A6XX_VPC_CNTL_0_UNKLOC(0xff));
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VS_PACK, 1);
tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) |
A6XX_VPC_VS_PACK_PSIZELOC(pointsize_loc) |
A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc));
if (hs) {
shader_info *hs_info = &hs->shader->nir->info;
tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
@@ -874,36 +940,6 @@ tu6_emit_vpc(struct tu_cs *cs,
tu_cs_emit(cs, A6XX_PC_TESS_CNTL_SPACING(spacing) |
A6XX_PC_TESS_CNTL_OUTPUT(output));
/* xxx: Misc tess unknowns: */
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_DS_CLIP_CNTL, 1);
tu_cs_emit(cs, 0x00ffff00);
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_DS_LAYER_CNTL, 1);
tu_cs_emit(cs, 0x0000ffff);
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_DS_LAYER_CNTL, 1);
tu_cs_emit(cs, 0x0);
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_DS_CL_CNTL, 1);
tu_cs_emit(cs, 0x0);
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VS_PACK, 1);
tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) |
A6XX_VPC_VS_PACK_PSIZELOC(255) |
A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc));
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_DS_PACK, 1);
tu_cs_emit(cs, A6XX_VPC_DS_PACK_POSITIONLOC(position_loc) |
A6XX_VPC_DS_PACK_PSIZELOC(pointsize_loc) |
A6XX_VPC_DS_PACK_STRIDE_IN_VPC(linkage.max_loc));
tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_PRIMITIVE_CNTL, 1);
tu_cs_emit(cs, A6XX_SP_DS_PRIMITIVE_CNTL_OUT(linkage.cnt));
tu_cs_emit_pkt4(cs, REG_A6XX_PC_DS_OUT_CNTL, 1);
tu_cs_emit(cs, A6XX_PC_DS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
CONDREG(pointsize_regid, 0x100));
tu6_emit_link_map(cs, vs, hs, SB6_HS_SHADER);
tu6_emit_link_map(cs, hs, ds, SB6_DS_SHADER);
}
@@ -931,33 +967,6 @@ tu6_emit_vpc(struct tu_cs *cs,
vec4_size = 0;
}
uint32_t primitive_regid =
ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID);
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_PACK, 1);
tu_cs_emit(cs, A6XX_VPC_GS_PACK_POSITIONLOC(position_loc) |
A6XX_VPC_GS_PACK_PSIZELOC(pointsize_loc) |
A6XX_VPC_GS_PACK_STRIDE_IN_VPC(linkage.max_loc));
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_LAYER_CNTL, 1);
tu_cs_emit(cs, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | 0xff00);
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_GS_LAYER_CNTL, 1);
tu_cs_emit(cs, CONDREG(layer_regid,
A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER));
uint32_t flags_regid = ir3_find_output_regid(gs,
VARYING_SLOT_GS_VERTEX_FLAGS_IR3);
tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIMITIVE_CNTL, 1);
tu_cs_emit(cs, A6XX_SP_GS_PRIMITIVE_CNTL_OUT(linkage.cnt) |
A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid));
tu_cs_emit_pkt4(cs, REG_A6XX_PC_GS_OUT_CNTL, 1);
tu_cs_emit(cs, A6XX_PC_GS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
CONDREG(pointsize_regid, A6XX_PC_GS_OUT_CNTL_PSIZE) |
CONDREG(layer_regid, A6XX_PC_GS_OUT_CNTL_LAYER) |
CONDREG(primitive_regid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID));
tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1);
tu_cs_emit(cs,
A6XX_PC_PRIMITIVE_CNTL_5_GS_VERTICES_OUT(vertices_out) |
@@ -967,15 +976,9 @@ tu6_emit_vpc(struct tu_cs *cs,
tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1);
tu_cs_emit(cs, 0);
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_GS_CL_CNTL, 1);
tu_cs_emit(cs, 0);
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_UNKNOWN_9100, 1);
tu_cs_emit(cs, 0xff);
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_CLIP_CNTL, 1);
tu_cs_emit(cs, 0xffff00);
tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size));
@@ -985,13 +988,6 @@ tu6_emit_vpc(struct tu_cs *cs,
tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1);
tu_cs_emit(cs, vs->output_size);
}
tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_PRIMITIVE_CNTL, 1);
tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt));
tu_cs_emit_pkt4(cs, REG_A6XX_PC_VS_OUT_CNTL, 1);
tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
(last_shader->writes_psize ? A6XX_PC_VS_OUT_CNTL_PSIZE : 0));
}
static int
@@ -2222,7 +2218,7 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
enum a6xx_polygon_mode mode = tu6_polygon_mode(rast_info->polygonMode);
struct tu_cs cs;
tu_cs_begin_sub_stream(&pipeline->cs, 11, &cs);
tu_cs_begin_sub_stream(&pipeline->cs, 9, &cs);
tu_cs_emit_regs(&cs,
A6XX_GRAS_CL_CNTL(
@@ -2239,7 +2235,6 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
A6XX_PC_POLYGON_MODE(.mode = mode));
/* move to hw ctx init? */
tu_cs_emit_regs(&cs, A6XX_GRAS_VS_CL_CNTL());
tu_cs_emit_regs(&cs,
A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f),
A6XX_GRAS_SU_POINT_SIZE(1.0f));