i965/fs: Handle nir shared variable store intrinsic
v4: * Apply similar optimization for shared variable stores as0cb7d7b4b7. This was causing a OpenGLES 3.1 CTS failure, but867c436ca8fixes that. Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
This commit is contained in:
@@ -2455,6 +2455,54 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_store_shared_indirect:
|
||||
has_indirect = true;
|
||||
/* fallthrough */
|
||||
case nir_intrinsic_store_shared: {
|
||||
assert(devinfo->gen >= 7);
|
||||
|
||||
/* Block index */
|
||||
fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
|
||||
|
||||
/* Value */
|
||||
fs_reg val_reg = get_nir_src(instr->src[0]);
|
||||
|
||||
/* Writemask */
|
||||
unsigned writemask = instr->const_index[1];
|
||||
|
||||
/* Combine groups of consecutive enabled channels in one write
|
||||
* message. We use ffs to find the first enabled channel and then ffs on
|
||||
* the bit-inverse, down-shifted writemask to determine the length of
|
||||
* the block of enabled bits.
|
||||
*/
|
||||
while (writemask) {
|
||||
unsigned first_component = ffs(writemask) - 1;
|
||||
unsigned length = ffs(~(writemask >> first_component)) - 1;
|
||||
fs_reg offset_reg;
|
||||
|
||||
if (!has_indirect) {
|
||||
offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component);
|
||||
} else {
|
||||
offset_reg = vgrf(glsl_type::uint_type);
|
||||
bld.ADD(offset_reg,
|
||||
retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(4 * first_component));
|
||||
}
|
||||
|
||||
emit_untyped_write(bld, surf_index, offset_reg,
|
||||
offset(val_reg, bld, first_component),
|
||||
1 /* dims */, length,
|
||||
BRW_PREDICATE_NONE);
|
||||
|
||||
/* Clear the bits in the writemask that we just wrote, then try
|
||||
* again to see if more channels are left.
|
||||
*/
|
||||
writemask &= (15 << (first_component + length));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_input_indirect:
|
||||
unreachable("Not allowed");
|
||||
/* fallthrough */
|
||||
|
||||
Reference in New Issue
Block a user