glsl, nir: Make ir_quadop_bitfield_insert a vectorized operation.
We would like to be able to combine result.x = bitfieldInsert(src0.x, src1.x, src2.x, src3.x); result.y = bitfieldInsert(src0.y, src1.y, src2.y, src3.y); result.z = bitfieldInsert(src0.z, src1.z, src2.z, src3.z); result.w = bitfieldInsert(src0.w, src1.w, src2.w, src3.w); into a single ivec4 bitfieldInsert operation. This should be possible with most drivers. This patch changes the offset and bits parameters from scalar ints to ivecN or uvecN. The type of all four operands will be the same, for simplicity. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
committed by
Matt Turner
parent
b85a229e1f
commit
b4e198f47f
@@ -4902,13 +4902,19 @@ builtin_builder::_bitfieldExtract(const glsl_type *type)
|
||||
ir_function_signature *
|
||||
builtin_builder::_bitfieldInsert(const glsl_type *type)
|
||||
{
|
||||
bool is_uint = type->base_type == GLSL_TYPE_UINT;
|
||||
ir_variable *base = in_var(type, "base");
|
||||
ir_variable *insert = in_var(type, "insert");
|
||||
ir_variable *offset = in_var(glsl_type::int_type, "offset");
|
||||
ir_variable *bits = in_var(glsl_type::int_type, "bits");
|
||||
MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits);
|
||||
|
||||
body.emit(ret(bitfield_insert(base, insert, offset, bits)));
|
||||
operand cast_offset = is_uint ? i2u(offset) : operand(offset);
|
||||
operand cast_bits = is_uint ? i2u(bits) : operand(bits);
|
||||
|
||||
body.emit(ret(bitfield_insert(base, insert,
|
||||
swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements),
|
||||
swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements))));
|
||||
|
||||
return sig;
|
||||
}
|
||||
|
||||
@@ -1710,7 +1710,6 @@ public:
|
||||
operation == ir_triop_vector_insert ||
|
||||
operation == ir_quadop_vector ||
|
||||
/* TODO: these can't currently be vectorized */
|
||||
operation == ir_quadop_bitfield_insert ||
|
||||
operation == ir_triop_bitfield_extract;
|
||||
}
|
||||
|
||||
|
||||
@@ -1710,10 +1710,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
|
||||
}
|
||||
|
||||
case ir_quadop_bitfield_insert: {
|
||||
int offset = op[2]->value.i[0];
|
||||
int bits = op[3]->value.i[0];
|
||||
|
||||
for (unsigned c = 0; c < components; c++) {
|
||||
int offset = op[2]->value.i[c];
|
||||
int bits = op[3]->value.i[c];
|
||||
|
||||
if (bits == 0)
|
||||
data.u[c] = op[0]->value.u[c];
|
||||
else if (offset < 0 || bits < 0)
|
||||
|
||||
@@ -647,10 +647,11 @@ ir_validate::visit_leave(ir_expression *ir)
|
||||
break;
|
||||
|
||||
case ir_quadop_bitfield_insert:
|
||||
assert(ir->type->is_integer());
|
||||
assert(ir->operands[0]->type == ir->type);
|
||||
assert(ir->operands[1]->type == ir->type);
|
||||
assert(ir->operands[2]->type == glsl_type::int_type);
|
||||
assert(ir->operands[3]->type == glsl_type::int_type);
|
||||
assert(ir->operands[2]->type == ir->type);
|
||||
assert(ir->operands[3]->type == ir->type);
|
||||
break;
|
||||
|
||||
case ir_quadop_vector:
|
||||
|
||||
@@ -381,8 +381,8 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
|
||||
|
||||
ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem);
|
||||
|
||||
ir_constant *exp_shift = new(ir) ir_constant(23);
|
||||
ir_constant *exp_width = new(ir) ir_constant(8);
|
||||
ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem);
|
||||
ir_constant *exp_width = new(ir) ir_constant(8, vec_elem);
|
||||
|
||||
/* Temporary variables */
|
||||
ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
|
||||
@@ -470,8 +470,8 @@ lower_instructions_visitor::dldexp_to_arith(ir_expression *ir)
|
||||
|
||||
ir_constant *sign_mask = new(ir) ir_constant(0x80000000u);
|
||||
|
||||
ir_constant *exp_shift = new(ir) ir_constant(20);
|
||||
ir_constant *exp_width = new(ir) ir_constant(11);
|
||||
ir_constant *exp_shift = new(ir) ir_constant(20, vec_elem);
|
||||
ir_constant *exp_width = new(ir) ir_constant(11, vec_elem);
|
||||
ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem);
|
||||
|
||||
/* Temporary variables */
|
||||
|
||||
@@ -230,8 +230,8 @@ private:
|
||||
if (op_mask & LOWER_PACK_USE_BFI) {
|
||||
return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)),
|
||||
swizzle_y(u),
|
||||
constant(16),
|
||||
constant(16));
|
||||
constant(16u),
|
||||
constant(16u));
|
||||
}
|
||||
|
||||
/* return (u.y << 16) | (u.x & 0xffff); */
|
||||
@@ -261,9 +261,9 @@ private:
|
||||
return bitfield_insert(bitfield_insert(
|
||||
bitfield_insert(
|
||||
bit_and(swizzle_x(u), constant(0xffu)),
|
||||
swizzle_y(u), constant(8), constant(8)),
|
||||
swizzle_z(u), constant(16), constant(8)),
|
||||
swizzle_w(u), constant(24), constant(8));
|
||||
swizzle_y(u), constant(8u), constant(8u)),
|
||||
swizzle_z(u), constant(16u), constant(8u)),
|
||||
swizzle_w(u), constant(24u), constant(8u));
|
||||
}
|
||||
|
||||
/* uvec4 u = UVEC4_RVAL & 0xff */
|
||||
|
||||
@@ -609,10 +609,10 @@ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
|
||||
[tuint, tuint, tuint, tuint],
|
||||
"", const_expr)
|
||||
|
||||
opcode("bitfield_insert", 0, tuint, [0, 0, 1, 1],
|
||||
opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
|
||||
[tuint, tuint, tint, tint], "", """
|
||||
unsigned base = src0, insert = src1;
|
||||
int offset = src2.x, bits = src3.x;
|
||||
int offset = src2, bits = src3;
|
||||
if (bits == 0) {
|
||||
dst = 0;
|
||||
} else if (offset < 0 || bits < 0 || bits + offset > 32) {
|
||||
|
||||
Reference in New Issue
Block a user