brw: enable vertex fetching component packing
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32418>
This commit is contained in:
committed by
Marge Bot
parent
9b8d75c95c
commit
4f892ae4f7
@@ -28,6 +28,159 @@ brw_assign_vs_urb_setup(brw_shader &s)
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
brw_nir_pack_vs_input(nir_shader *nir, struct brw_vs_prog_data *prog_data)
|
||||
{
|
||||
struct vf_attribute {
|
||||
unsigned reg_offset;
|
||||
uint8_t component_mask;
|
||||
bool is_64bit:1;
|
||||
bool is_used:1;
|
||||
} attributes[MAX_HW_VERT_ATTRIB] = {};
|
||||
|
||||
/* IO lowering is going to break dmat inputs into a location each, so we
|
||||
* need to reproduce the 64bit nature of the variable into each slot.
|
||||
*/
|
||||
nir_foreach_shader_in_variable(var, nir) {
|
||||
const bool is_64bit = glsl_type_is_64bit(var->type);
|
||||
const uint32_t slots = glsl_count_vec4_slots(var->type, true, false);
|
||||
for (uint32_t i = 0; i < slots; i++)
|
||||
attributes[var->data.location + i].is_64bit = is_64bit;
|
||||
}
|
||||
|
||||
/* First mark all used inputs */
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_input)
|
||||
continue;
|
||||
|
||||
assert(intrin->def.bit_size == 32);
|
||||
|
||||
const struct nir_io_semantics io =
|
||||
nir_intrinsic_io_semantics(intrin);
|
||||
|
||||
attributes[io.location].is_used = true;
|
||||
|
||||
/* SKL PRMs, Vol 2a: Command Reference: Instructions,
|
||||
* 3DSTATE_VF_COMPONENT_PACKING:
|
||||
*
|
||||
* "Software shall enable all components (XYZW) for any and all
|
||||
* VERTEX_ELEMENTs associated with a 256-bit SURFACE_FORMAT.
|
||||
* It is INVALID to disable any components in these cases."
|
||||
*
|
||||
* Enable this XYZW for any > 128-bit format.
|
||||
*/
|
||||
if (nir->info.dual_slot_inputs & BITFIELD64_BIT(io.location)) {
|
||||
attributes[io.location].component_mask |= 0xff;
|
||||
} else {
|
||||
const uint8_t mask =
|
||||
nir_component_mask(intrin->num_components) <<
|
||||
nir_intrinsic_component(intrin);
|
||||
|
||||
attributes[io.location].component_mask |= mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute the register offsets */
|
||||
unsigned reg_offset = 0;
|
||||
unsigned vertex_element = 0;
|
||||
for (unsigned a = 0; a < ARRAY_SIZE(attributes); a++) {
|
||||
if (!attributes[a].is_used)
|
||||
continue;
|
||||
|
||||
/* SKL PRMs, Vol 2a: Command Reference: Instructions,
|
||||
* 3DSTATE_VF_COMPONENT_PACKING:
|
||||
*
|
||||
* "No enable bits are provided for Vertex Elements [32-33],
|
||||
* and therefore no packing is performed on these elements (if
|
||||
* Valid, all 4 components are stored)."
|
||||
*/
|
||||
if (vertex_element >= 32)
|
||||
attributes[a].component_mask = 0xf;
|
||||
|
||||
attributes[a].reg_offset = reg_offset;
|
||||
|
||||
reg_offset += util_bitcount(attributes[a].component_mask);
|
||||
vertex_element++;
|
||||
}
|
||||
|
||||
/* Remap inputs */
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_input)
|
||||
continue;
|
||||
|
||||
struct nir_io_semantics io = nir_intrinsic_io_semantics(intrin);
|
||||
|
||||
unsigned slot = attributes[io.location].reg_offset / 4;
|
||||
unsigned slot_component =
|
||||
attributes[io.location].reg_offset % 4 +
|
||||
util_bitcount(attributes[io.location].component_mask &
|
||||
BITFIELD_MASK(io.high_dvec2 * 4 +
|
||||
nir_intrinsic_component(intrin)));
|
||||
|
||||
slot += slot_component / 4;
|
||||
slot_component %= 4;
|
||||
|
||||
nir_intrinsic_set_base(intrin, slot);
|
||||
nir_intrinsic_set_component(intrin, slot_component);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate the packing array */
|
||||
unsigned vf_offset = 0;
|
||||
for (unsigned a = 0; a < ARRAY_SIZE(attributes) && vf_offset < 32; a++) {
|
||||
if (!attributes[a].is_used)
|
||||
continue;
|
||||
|
||||
uint32_t mask;
|
||||
/* Stores masks in attributes[a].component_mask are in terms of 32-bit
|
||||
* components, but the HW depending on the format will interpret
|
||||
* prog_data->vf_component_packing[] bits as either a 32-bit or 64-bit
|
||||
* component. So we need to only consider every other bit.
|
||||
*/
|
||||
if (attributes[a].is_64bit) {
|
||||
mask = 0;
|
||||
u_foreach_bit(b, attributes[a].component_mask)
|
||||
mask |= BITFIELD_BIT(b / 2);
|
||||
} else {
|
||||
mask = attributes[a].component_mask;
|
||||
}
|
||||
/* We should only have 4bits enabled max */
|
||||
assert((mask & ~0xfu) == 0);
|
||||
prog_data->vf_component_packing[vf_offset / 8] |=
|
||||
mask << (4 * (vf_offset % 8));
|
||||
vf_offset++;
|
||||
}
|
||||
|
||||
/* SKL PRMs, Vol 2a: Command Reference: Instructions,
|
||||
* 3DSTATE_VF_COMPONENT_PACKING:
|
||||
*
|
||||
* "At least one component of one "valid" Vertex Element must be
|
||||
* enabled."
|
||||
*/
|
||||
if (prog_data->vf_component_packing[0] == 0 &&
|
||||
prog_data->vf_component_packing[1] == 0 &&
|
||||
prog_data->vf_component_packing[2] == 0 &&
|
||||
prog_data->vf_component_packing[3] == 0)
|
||||
prog_data->vf_component_packing[0] = 0x1;
|
||||
|
||||
return reg_offset;
|
||||
}
|
||||
|
||||
static bool
|
||||
run_vs(brw_shader &s)
|
||||
{
|
||||
@@ -83,6 +236,13 @@ brw_compile_vs(const struct brw_compiler *compiler,
|
||||
|
||||
brw_nir_lower_vs_inputs(nir);
|
||||
brw_nir_lower_vue_outputs(nir);
|
||||
|
||||
memset(prog_data->vf_component_packing, 0,
|
||||
sizeof(prog_data->vf_component_packing));
|
||||
unsigned nr_packed_regs = 0;
|
||||
if (key->vf_component_packing)
|
||||
nr_packed_regs = brw_nir_pack_vs_input(nir, prog_data);
|
||||
|
||||
brw_postprocess_nir(nir, compiler, debug_enabled,
|
||||
key->base.robust_flags);
|
||||
|
||||
@@ -127,8 +287,14 @@ brw_compile_vs(const struct brw_compiler *compiler,
|
||||
if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
|
||||
prog_data->uses_drawid = true;
|
||||
|
||||
prog_data->base.urb_read_length = DIV_ROUND_UP(nr_attribute_slots, 2);
|
||||
unsigned nr_attribute_regs = 4 * nr_attribute_slots;
|
||||
unsigned nr_attribute_regs;
|
||||
if (key->vf_component_packing) {
|
||||
prog_data->base.urb_read_length = DIV_ROUND_UP(nr_packed_regs, 8);
|
||||
nr_attribute_regs = nr_packed_regs;
|
||||
} else {
|
||||
prog_data->base.urb_read_length = DIV_ROUND_UP(nr_attribute_slots, 2);
|
||||
nr_attribute_regs = 4 * (nr_attribute_slots);
|
||||
}
|
||||
|
||||
/* Since vertex shaders reuse the same VUE entry for inputs and outputs
|
||||
* (overwriting the original contents), we need to make sure the size is
|
||||
|
||||
@@ -233,6 +233,16 @@ struct brw_base_prog_key {
|
||||
/** The program key for Vertex Shaders. */
|
||||
struct brw_vs_prog_key {
|
||||
struct brw_base_prog_key base;
|
||||
|
||||
/** Enable component packing
|
||||
*
|
||||
* Using this option requires that the driver programs
|
||||
* 3DSTATE_VF_COMPONENT_PACKING with the values provided in
|
||||
* brw_vs_prog_data::vf_component_packing
|
||||
*/
|
||||
bool vf_component_packing : 1;
|
||||
|
||||
uint32_t padding : 31;
|
||||
};
|
||||
|
||||
/** The program key for Tessellation Control Shaders. */
|
||||
@@ -1028,6 +1038,8 @@ struct brw_vs_prog_data {
|
||||
bool uses_firstvertex;
|
||||
bool uses_baseinstance;
|
||||
bool uses_drawid;
|
||||
|
||||
uint32_t vf_component_packing[4];
|
||||
};
|
||||
|
||||
struct brw_tcs_prog_data
|
||||
|
||||
Reference in New Issue
Block a user