diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 4c18a66cc8a..f447b2e3e72 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -524,8 +524,7 @@ r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_ .persp_division_disable = 1, .vp_xform_disable = 1, .vp_clip_code_ignore = 1, - .clip_disable = 1), - A6XX_GRAS_VS_CL_CNTL(0)); + .clip_disable = 1)); tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL()); // XXX msaa enable? tu_cs_emit_regs(cs, diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index de1a672b094..765732ad9a9 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -746,7 +746,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A982, 0); tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A9A8, 0); tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5); - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_VS_LAYER_CNTL, 0x0000ffff); /* TODO: set A6XX_VFD_ADD_OFFSET_INSTANCE and fix ir3 to avoid adding base instance */ tu_cs_emit_write_reg(cs, REG_A6XX_VFD_ADD_OFFSET, A6XX_VFD_ADD_OFFSET_VERTEX); @@ -769,7 +768,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0); tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_VS_CLIP_CNTL, 0xffff00); tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9107, 0); tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9236, @@ -791,7 +789,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0); tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8099, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_VS_LAYER_CNTL, 0); tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A0, 2); tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0); tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0); diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 74ab7872e9d..08eda112260 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -732,6 +732,53 @@ tu6_emit_vpc(struct tu_cs *cs, const struct ir3_shader_variant *gs, const struct ir3_shader_variant *fs) { + /* note: doesn't compile as static because of the array regs.. */ + const struct reg_config { + uint16_t reg_sp_xs_out_reg; + uint16_t reg_sp_xs_vpc_dst_reg; + uint16_t reg_vpc_xs_pack; + uint16_t reg_vpc_xs_clip_cntl; + uint16_t reg_gras_xs_cl_cntl; + uint16_t reg_pc_xs_out_cntl; + uint16_t reg_sp_xs_primitive_cntl; + uint16_t reg_vpc_xs_layer_cntl; + uint16_t reg_gras_xs_layer_cntl; + } reg_config[] = { + [MESA_SHADER_VERTEX] = { + REG_A6XX_SP_VS_OUT_REG(0), + REG_A6XX_SP_VS_VPC_DST_REG(0), + REG_A6XX_VPC_VS_PACK, + REG_A6XX_VPC_VS_CLIP_CNTL, + REG_A6XX_GRAS_VS_CL_CNTL, + REG_A6XX_PC_VS_OUT_CNTL, + REG_A6XX_SP_VS_PRIMITIVE_CNTL, + REG_A6XX_VPC_VS_LAYER_CNTL, + REG_A6XX_GRAS_VS_LAYER_CNTL + }, + [MESA_SHADER_TESS_EVAL] = { + REG_A6XX_SP_DS_OUT_REG(0), + REG_A6XX_SP_DS_VPC_DST_REG(0), + REG_A6XX_VPC_DS_PACK, + REG_A6XX_VPC_DS_CLIP_CNTL, + REG_A6XX_GRAS_DS_CL_CNTL, + REG_A6XX_PC_DS_OUT_CNTL, + REG_A6XX_SP_DS_PRIMITIVE_CNTL, + REG_A6XX_VPC_DS_LAYER_CNTL, + REG_A6XX_GRAS_DS_LAYER_CNTL + }, + [MESA_SHADER_GEOMETRY] = { + REG_A6XX_SP_GS_OUT_REG(0), + REG_A6XX_SP_GS_VPC_DST_REG(0), + REG_A6XX_VPC_GS_PACK, + REG_A6XX_VPC_GS_CLIP_CNTL, + REG_A6XX_GRAS_GS_CL_CNTL, + REG_A6XX_PC_GS_OUT_CNTL, + REG_A6XX_SP_GS_PRIMITIVE_CNTL, + REG_A6XX_VPC_GS_LAYER_CNTL, + REG_A6XX_GRAS_GS_LAYER_CNTL + }, + }; + const struct ir3_shader_variant *last_shader; if (gs) { last_shader = gs; @@ -740,6 +787,9 @@ tu6_emit_vpc(struct tu_cs *cs, } else { last_shader = vs; } + + const struct reg_config *cfg = ®_config[last_shader->type]; + struct ir3_shader_linkage linkage = { .primid_loc = 0xff }; if (fs) ir3_link_shaders(&linkage, last_shader, fs, true); @@ -766,6 +816,10 @@ tu6_emit_vpc(struct tu_cs *cs, ir3_find_output_regid(last_shader, VARYING_SLOT_PSIZ); const uint32_t layer_regid = gs ? ir3_find_output_regid(gs, VARYING_SLOT_LAYER) : regid(63, 0); + uint32_t primitive_regid = gs ? + ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) : regid(63, 0); + uint32_t flags_regid = gs ? + ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0; uint32_t pointsize_loc = 0xff, position_loc = 0xff, layer_loc = 0xff; if (layer_regid != regid(63, 0)) { @@ -797,22 +851,39 @@ tu6_emit_vpc(struct tu_cs *cs, A6XX_SP_VS_VPC_DST_REG_OUTLOC0(linkage.var[i].loc); } - if (gs) - tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_OUT_REG(0), sp_out_count); - else if (hs) - tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_OUT_REG(0), sp_out_count); - else - tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_OUT_REG(0), sp_out_count); + tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_out_reg, sp_out_count); tu_cs_emit_array(cs, sp_out, sp_out_count); - if (gs) - tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_VPC_DST_REG(0), sp_vpc_dst_count); - else if (hs) - tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_VPC_DST_REG(0), sp_vpc_dst_count); - else - tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_VPC_DST_REG(0), sp_vpc_dst_count); + tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_vpc_dst_reg, sp_vpc_dst_count); tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count); + tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_pack, 1); + tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) | + A6XX_VPC_VS_PACK_PSIZELOC(pointsize_loc) | + A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc)); + + tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1); + tu_cs_emit(cs, 0xffff00); + + tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_cl_cntl, 1); + tu_cs_emit(cs, 0); + + tu_cs_emit_pkt4(cs, cfg->reg_pc_xs_out_cntl, 1); + tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) | + CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) | + CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) | + CONDREG(primitive_regid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID)); + + tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1); + tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt) | + A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid)); + + tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1); + tu_cs_emit(cs, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | 0xff00); + + tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1); + tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER)); + tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMID_CNTL, 1); tu_cs_emit(cs, COND(primid_passthru, A6XX_PC_PRIMID_CNTL_PRIMID_PASSTHRU)); @@ -822,11 +893,6 @@ tu6_emit_vpc(struct tu_cs *cs, A6XX_VPC_CNTL_0_PRIMIDLOC(linkage.primid_loc) | A6XX_VPC_CNTL_0_UNKLOC(0xff)); - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VS_PACK, 1); - tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) | - A6XX_VPC_VS_PACK_PSIZELOC(pointsize_loc) | - A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc)); - if (hs) { shader_info *hs_info = &hs->shader->nir->info; tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_NUM_VERTEX, 1); @@ -874,36 +940,6 @@ tu6_emit_vpc(struct tu_cs *cs, tu_cs_emit(cs, A6XX_PC_TESS_CNTL_SPACING(spacing) | A6XX_PC_TESS_CNTL_OUTPUT(output)); - /* xxx: Misc tess unknowns: */ - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_DS_CLIP_CNTL, 1); - tu_cs_emit(cs, 0x00ffff00); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_DS_LAYER_CNTL, 1); - tu_cs_emit(cs, 0x0000ffff); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_DS_LAYER_CNTL, 1); - tu_cs_emit(cs, 0x0); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_DS_CL_CNTL, 1); - tu_cs_emit(cs, 0x0); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VS_PACK, 1); - tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) | - A6XX_VPC_VS_PACK_PSIZELOC(255) | - A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc)); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_DS_PACK, 1); - tu_cs_emit(cs, A6XX_VPC_DS_PACK_POSITIONLOC(position_loc) | - A6XX_VPC_DS_PACK_PSIZELOC(pointsize_loc) | - A6XX_VPC_DS_PACK_STRIDE_IN_VPC(linkage.max_loc)); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_PRIMITIVE_CNTL, 1); - tu_cs_emit(cs, A6XX_SP_DS_PRIMITIVE_CNTL_OUT(linkage.cnt)); - - tu_cs_emit_pkt4(cs, REG_A6XX_PC_DS_OUT_CNTL, 1); - tu_cs_emit(cs, A6XX_PC_DS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) | - CONDREG(pointsize_regid, 0x100)); - tu6_emit_link_map(cs, vs, hs, SB6_HS_SHADER); tu6_emit_link_map(cs, hs, ds, SB6_DS_SHADER); } @@ -931,33 +967,6 @@ tu6_emit_vpc(struct tu_cs *cs, vec4_size = 0; } - uint32_t primitive_regid = - ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID); - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_PACK, 1); - tu_cs_emit(cs, A6XX_VPC_GS_PACK_POSITIONLOC(position_loc) | - A6XX_VPC_GS_PACK_PSIZELOC(pointsize_loc) | - A6XX_VPC_GS_PACK_STRIDE_IN_VPC(linkage.max_loc)); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_LAYER_CNTL, 1); - tu_cs_emit(cs, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | 0xff00); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_GS_LAYER_CNTL, 1); - tu_cs_emit(cs, CONDREG(layer_regid, - A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER)); - - uint32_t flags_regid = ir3_find_output_regid(gs, - VARYING_SLOT_GS_VERTEX_FLAGS_IR3); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIMITIVE_CNTL, 1); - tu_cs_emit(cs, A6XX_SP_GS_PRIMITIVE_CNTL_OUT(linkage.cnt) | - A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid)); - - tu_cs_emit_pkt4(cs, REG_A6XX_PC_GS_OUT_CNTL, 1); - tu_cs_emit(cs, A6XX_PC_GS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) | - CONDREG(pointsize_regid, A6XX_PC_GS_OUT_CNTL_PSIZE) | - CONDREG(layer_regid, A6XX_PC_GS_OUT_CNTL_LAYER) | - CONDREG(primitive_regid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID)); - tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1); tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_5_GS_VERTICES_OUT(vertices_out) | @@ -967,15 +976,9 @@ tu6_emit_vpc(struct tu_cs *cs, tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1); tu_cs_emit(cs, 0); - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_GS_CL_CNTL, 1); - tu_cs_emit(cs, 0); - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_UNKNOWN_9100, 1); tu_cs_emit(cs, 0xff); - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_CLIP_CNTL, 1); - tu_cs_emit(cs, 0xffff00); - tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1); tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size)); @@ -985,13 +988,6 @@ tu6_emit_vpc(struct tu_cs *cs, tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1); tu_cs_emit(cs, vs->output_size); } - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_PRIMITIVE_CNTL, 1); - tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt)); - - tu_cs_emit_pkt4(cs, REG_A6XX_PC_VS_OUT_CNTL, 1); - tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) | - (last_shader->writes_psize ? A6XX_PC_VS_OUT_CNTL_PSIZE : 0)); } static int @@ -2222,7 +2218,7 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, enum a6xx_polygon_mode mode = tu6_polygon_mode(rast_info->polygonMode); struct tu_cs cs; - tu_cs_begin_sub_stream(&pipeline->cs, 11, &cs); + tu_cs_begin_sub_stream(&pipeline->cs, 9, &cs); tu_cs_emit_regs(&cs, A6XX_GRAS_CL_CNTL( @@ -2239,7 +2235,6 @@ tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, A6XX_PC_POLYGON_MODE(.mode = mode)); /* move to hw ctx init? */ - tu_cs_emit_regs(&cs, A6XX_GRAS_VS_CL_CNTL()); tu_cs_emit_regs(&cs, A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f), A6XX_GRAS_SU_POINT_SIZE(1.0f));