i965/fs: add support for ir_*_interpolate_at_* expressions
SIMD8-only for now.
V5: - Fix style complaints
- Move prototype to be with other oddball emit functions
- Use unreachable() instead of assert() where possible
V6: - Describe what is happening with the clamping
- Add reg_width to make some expressions clearer
Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -439,6 +439,8 @@ public:
|
||||
void emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
|
||||
fs_reg offset);
|
||||
|
||||
void emit_interpolate_expression(ir_expression *ir);
|
||||
|
||||
bool try_rewrite_rhs_to_dst(ir_assignment *ir,
|
||||
fs_reg dst,
|
||||
fs_reg src,
|
||||
|
||||
@@ -344,6 +344,133 @@ fs_visitor::try_emit_mad(ir_expression *ir)
|
||||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
pack_pixel_offset(float x)
|
||||
{
|
||||
/* Clamp upper end of the range to +7/16. See explanation in non-constant
|
||||
* offset case below. */
|
||||
int n = MIN2((int)(x * 16), 7);
|
||||
return n & 0xf;
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::emit_interpolate_expression(ir_expression *ir)
|
||||
{
|
||||
/* in SIMD16 mode, the pixel interpolator returns coords interleaved
|
||||
* 8 channels at a time, same as the barycentric coords presented in
|
||||
* the FS payload. this requires a bit of extra work to support.
|
||||
*/
|
||||
no16("interpolate_at_* not yet supported in SIMD16 mode.");
|
||||
|
||||
ir_dereference * deref = ir->operands[0]->as_dereference();
|
||||
ir_swizzle * swiz = NULL;
|
||||
if (!deref) {
|
||||
/* the api does not allow a swizzle here, but the varying packing code
|
||||
* may have pushed one into here.
|
||||
*/
|
||||
swiz = ir->operands[0]->as_swizzle();
|
||||
assert(swiz);
|
||||
deref = swiz->val->as_dereference();
|
||||
}
|
||||
assert(deref);
|
||||
ir_variable * var = deref->variable_referenced();
|
||||
assert(var);
|
||||
|
||||
/* 1. collect interpolation factors */
|
||||
|
||||
fs_reg dst_x = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 2, 1));
|
||||
fs_reg dst_y = dst_x;
|
||||
dst_y.reg_offset++;
|
||||
|
||||
/* for most messages, we need one reg of ignored data; the hardware requires mlen==1
|
||||
* even when there is no payload. in the per-slot offset case, we'll replace this with
|
||||
* the proper source data. */
|
||||
fs_reg src = fs_reg(this, glsl_type::float_type);
|
||||
int mlen = 1; /* one reg unless overriden */
|
||||
int reg_width = dispatch_width / 8;
|
||||
fs_inst *inst;
|
||||
|
||||
switch (ir->operation) {
|
||||
case ir_unop_interpolate_at_centroid:
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u));
|
||||
break;
|
||||
|
||||
case ir_binop_interpolate_at_sample: {
|
||||
ir_constant *sample_num = ir->operands[1]->as_constant();
|
||||
assert(sample_num || !"nonconstant sample number should have been lowered.");
|
||||
|
||||
unsigned msg_data = sample_num->value.i[0] << 4;
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data));
|
||||
break;
|
||||
}
|
||||
|
||||
case ir_binop_interpolate_at_offset: {
|
||||
ir_constant *const_offset = ir->operands[1]->as_constant();
|
||||
if (const_offset) {
|
||||
unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) |
|
||||
(pack_pixel_offset(const_offset->value.f[1]) << 4);
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src,
|
||||
fs_reg(msg_data));
|
||||
} else {
|
||||
/* pack the operands: hw wants offsets as 4 bit signed ints */
|
||||
ir->operands[1]->accept(this);
|
||||
src = fs_reg(this, glsl_type::ivec2_type);
|
||||
fs_reg src2 = src;
|
||||
for (int i = 0; i < 2; i++) {
|
||||
fs_reg temp = fs_reg(this, glsl_type::float_type);
|
||||
emit(MUL(temp, this->result, fs_reg(16.0f)));
|
||||
emit(MOV(src2, temp)); /* float to int */
|
||||
|
||||
/* Clamp the upper end of the range to +7/16. ARB_gpu_shader5 requires
|
||||
* that we support a maximum offset of +0.5, which isn't representable
|
||||
* in a S0.4 value -- if we didn't clamp it, we'd end up with -8/16,
|
||||
* which is the opposite of what the shader author wanted.
|
||||
*
|
||||
* This is legal due to ARB_gpu_shader5's quantization rules:
|
||||
*
|
||||
* "Not all values of <offset> may be supported; x and y offsets may
|
||||
* be rounded to fixed-point values with the number of fraction bits
|
||||
* given by the implementation-dependent constant
|
||||
* FRAGMENT_INTERPOLATION_OFFSET_BITS"
|
||||
*/
|
||||
|
||||
fs_inst *inst = emit(BRW_OPCODE_SEL, src2, src2, fs_reg(7));
|
||||
inst->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */
|
||||
|
||||
src2.reg_offset++;
|
||||
this->result.reg_offset++;
|
||||
}
|
||||
|
||||
mlen = 2 * reg_width;
|
||||
inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src,
|
||||
fs_reg(0u));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
||||
inst->mlen = mlen;
|
||||
inst->regs_written = 2 * reg_width; /* 2 floats per slot returned */
|
||||
inst->pi_noperspective = var->determine_interpolation_mode(key->flat_shade) ==
|
||||
INTERP_QUALIFIER_NOPERSPECTIVE;
|
||||
|
||||
/* 2. emit linterp */
|
||||
|
||||
fs_reg res(this, ir->type);
|
||||
this->result = res;
|
||||
|
||||
for (int i = 0; i < ir->type->vector_elements; i++) {
|
||||
int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i;
|
||||
emit(FS_OPCODE_LINTERP, res,
|
||||
dst_x, dst_y,
|
||||
fs_reg(interp_reg(var->data.location, ch)));
|
||||
res.reg_offset++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::visit(ir_expression *ir)
|
||||
{
|
||||
@@ -355,9 +482,22 @@ fs_visitor::visit(ir_expression *ir)
|
||||
|
||||
if (try_emit_saturate(ir))
|
||||
return;
|
||||
if (ir->operation == ir_binop_add) {
|
||||
|
||||
/* Deal with the real oddball stuff first */
|
||||
switch (ir->operation) {
|
||||
case ir_binop_add:
|
||||
if (try_emit_mad(ir))
|
||||
return;
|
||||
return;
|
||||
break;
|
||||
|
||||
case ir_unop_interpolate_at_centroid:
|
||||
case ir_binop_interpolate_at_offset:
|
||||
case ir_binop_interpolate_at_sample:
|
||||
emit_interpolate_expression(ir);
|
||||
return;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (operand = 0; operand < ir->get_num_operands(); operand++) {
|
||||
@@ -815,6 +955,12 @@ fs_visitor::visit(ir_expression *ir)
|
||||
inst = emit(BRW_OPCODE_SEL, this->result, op[1], op[2]);
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
break;
|
||||
|
||||
case ir_unop_interpolate_at_centroid:
|
||||
case ir_binop_interpolate_at_offset:
|
||||
case ir_binop_interpolate_at_sample:
|
||||
unreachable("already handled above");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user