i965/fs: Handle nir shared variable store intrinsic

v4:
 * Apply similar optimization for shared variable stores as
   0cb7d7b4b7. This was causing a
   OpenGLES 3.1 CTS failure, but
   867c436ca8 fixes that.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
This commit is contained in:
Jordan Justen
2015-07-28 15:25:46 -07:00
parent 8613206bd3
commit faddb301ff
+48
View File
@@ -2455,6 +2455,54 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
case nir_intrinsic_store_shared_indirect:
has_indirect = true;
/* fallthrough */
case nir_intrinsic_store_shared: {
assert(devinfo->gen >= 7);
/* Block index */
fs_reg surf_index = brw_imm_ud(GEN7_BTI_SLM);
/* Value */
fs_reg val_reg = get_nir_src(instr->src[0]);
/* Writemask */
unsigned writemask = instr->const_index[1];
/* Combine groups of consecutive enabled channels in one write
* message. We use ffs to find the first enabled channel and then ffs on
* the bit-inverse, down-shifted writemask to determine the length of
* the block of enabled bits.
*/
while (writemask) {
unsigned first_component = ffs(writemask) - 1;
unsigned length = ffs(~(writemask >> first_component)) - 1;
fs_reg offset_reg;
if (!has_indirect) {
offset_reg = brw_imm_ud(instr->const_index[0] + 4 * first_component);
} else {
offset_reg = vgrf(glsl_type::uint_type);
bld.ADD(offset_reg,
retype(get_nir_src(instr->src[1]), BRW_REGISTER_TYPE_UD),
brw_imm_ud(4 * first_component));
}
emit_untyped_write(bld, surf_index, offset_reg,
offset(val_reg, bld, first_component),
1 /* dims */, length,
BRW_PREDICATE_NONE);
/* Clear the bits in the writemask that we just wrote, then try
* again to see if more channels are left.
*/
writemask &= (15 << (first_component + length));
}
break;
}
case nir_intrinsic_load_input_indirect:
unreachable("Not allowed");
/* fallthrough */