|
|
|
@@ -79,10 +79,10 @@ void FragmentShader::do_get_shader_info(r600_shader *sh_info)
|
|
|
|
|
bool FragmentShader::load_input(nir_intrinsic_instr *intr)
|
|
|
|
|
{
|
|
|
|
|
auto& vf = value_factory();
|
|
|
|
|
AluInstr *ir = nullptr;
|
|
|
|
|
|
|
|
|
|
auto location = nir_intrinsic_io_semantics(intr).location;
|
|
|
|
|
if (location == VARYING_SLOT_POS) {
|
|
|
|
|
AluInstr *ir = nullptr;
|
|
|
|
|
for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
|
|
|
|
|
ir = new AluInstr(op1_mov,
|
|
|
|
|
vf.dest(intr->dest, i, pin_none),
|
|
|
|
@@ -96,41 +96,17 @@ bool FragmentShader::load_input(nir_intrinsic_instr *intr)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (location == VARYING_SLOT_FACE) {
|
|
|
|
|
ir = new AluInstr(op2_setge_dx10,
|
|
|
|
|
vf.dest(intr->dest, 0, pin_none),
|
|
|
|
|
m_face_input,
|
|
|
|
|
vf.inline_const(ALU_SRC_0, 0),
|
|
|
|
|
AluInstr::last_write);
|
|
|
|
|
auto ir = new AluInstr(op2_setge_dx10,
|
|
|
|
|
vf.dest(intr->dest, 0, pin_none),
|
|
|
|
|
m_face_input,
|
|
|
|
|
vf.inline_const(ALU_SRC_0, 0),
|
|
|
|
|
AluInstr::last_write);
|
|
|
|
|
set_input_gpr(nir_intrinsic_base(intr), m_face_input->sel());
|
|
|
|
|
|
|
|
|
|
emit_instruction(ir);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto io = input(nir_intrinsic_base(intr));
|
|
|
|
|
auto comp = nir_intrinsic_component(intr);
|
|
|
|
|
bool need_temp = comp > 0 || !intr->dest.is_ssa;
|
|
|
|
|
for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
|
|
|
|
|
if (need_temp) {
|
|
|
|
|
auto tmp = vf.temp_register(comp + i);
|
|
|
|
|
ir = new AluInstr(op1_interp_load_p0,
|
|
|
|
|
tmp,
|
|
|
|
|
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
|
|
|
|
|
AluInstr::last_write);
|
|
|
|
|
emit_instruction(ir);
|
|
|
|
|
emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write));
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
|
|
ir = new AluInstr(op1_interp_load_p0,
|
|
|
|
|
vf.dest(intr->dest, i, pin_chan),
|
|
|
|
|
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i),
|
|
|
|
|
AluInstr::write);
|
|
|
|
|
emit_instruction(ir);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
ir->set_alu_flag(alu_last_instr);
|
|
|
|
|
return true;
|
|
|
|
|
return load_input_hw(intr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShader::store_output(nir_intrinsic_instr *intr)
|
|
|
|
@@ -193,22 +169,10 @@ barycentric_ij_index(nir_intrinsic_instr *intr)
|
|
|
|
|
|
|
|
|
|
bool FragmentShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
|
|
|
|
|
{
|
|
|
|
|
auto& vf = value_factory();
|
|
|
|
|
switch (intr->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_barycentric_centroid:
|
|
|
|
|
case nir_intrinsic_load_barycentric_pixel:
|
|
|
|
|
case nir_intrinsic_load_barycentric_sample: {
|
|
|
|
|
unsigned ij = barycentric_ij_index(intr);
|
|
|
|
|
vf.inject_value(intr->dest, 0, m_interpolator[ij].i);
|
|
|
|
|
vf.inject_value(intr->dest, 1, m_interpolator[ij].j);
|
|
|
|
|
if (process_stage_intrinsic_hw(intr))
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
case nir_intrinsic_load_input:
|
|
|
|
|
return load_input(intr);
|
|
|
|
|
case nir_intrinsic_load_barycentric_at_offset:
|
|
|
|
|
return load_barycentric_at_offset(intr);
|
|
|
|
|
case nir_intrinsic_load_barycentric_at_sample:
|
|
|
|
|
return load_barycentric_at_sample(intr);
|
|
|
|
|
|
|
|
|
|
switch (intr->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_interpolated_input:
|
|
|
|
|
return load_interpolated_input(intr);
|
|
|
|
|
case nir_intrinsic_discard_if:
|
|
|
|
@@ -250,176 +214,21 @@ bool FragmentShader::load_interpolated_input(nir_intrinsic_instr *intr)
|
|
|
|
|
case VARYING_SLOT_POS:
|
|
|
|
|
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i)
|
|
|
|
|
vf.inject_value(intr->dest, i, m_pos_input[i]);
|
|
|
|
|
return true;
|
|
|
|
|
return true;
|
|
|
|
|
case VARYING_SLOT_FACE:
|
|
|
|
|
return false;
|
|
|
|
|
default:
|
|
|
|
|
;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto param = nir_src_as_const_value(intr->src[1]);
|
|
|
|
|
assert(param && "Indirect PS inputs not (yet) supported");
|
|
|
|
|
|
|
|
|
|
int dest_num_comp = nir_dest_num_components(intr->dest);
|
|
|
|
|
int start_comp = nir_intrinsic_component(intr);
|
|
|
|
|
bool need_temp = start_comp > 0 || !intr->dest.is_ssa;
|
|
|
|
|
|
|
|
|
|
auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan);
|
|
|
|
|
|
|
|
|
|
InterpolateParams params;
|
|
|
|
|
|
|
|
|
|
params.i = vf.src(intr->src[0], 0);
|
|
|
|
|
params.j = vf.src(intr->src[0], 1);
|
|
|
|
|
params.base = input(nir_intrinsic_base(intr)).lds_pos();
|
|
|
|
|
|
|
|
|
|
if (!load_interpolated(dst, params, dest_num_comp, start_comp))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (need_temp) {
|
|
|
|
|
AluInstr *ir = nullptr;
|
|
|
|
|
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
|
|
|
|
|
auto real_dst = vf.dest(intr->dest, i, pin_chan);
|
|
|
|
|
ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write);
|
|
|
|
|
emit_instruction(ir);
|
|
|
|
|
}
|
|
|
|
|
assert(ir);
|
|
|
|
|
ir->set_alu_flag(alu_last_instr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
return load_interpolated_input_hw(intr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShader::load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
|
|
|
|
|
int num_dest_comp, int start_comp)
|
|
|
|
|
{
|
|
|
|
|
sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i << ")" << "\n";
|
|
|
|
|
|
|
|
|
|
if (num_dest_comp == 1) {
|
|
|
|
|
switch (start_comp) {
|
|
|
|
|
case 0: return load_interpolated_one_comp(dest, params, op2_interp_x);
|
|
|
|
|
case 1: return load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 0, 1);
|
|
|
|
|
case 2: return load_interpolated_one_comp(dest, params, op2_interp_z);
|
|
|
|
|
case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 2, 3);
|
|
|
|
|
default:
|
|
|
|
|
assert(0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (num_dest_comp == 2) {
|
|
|
|
|
switch (start_comp) {
|
|
|
|
|
case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3);
|
|
|
|
|
case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc);
|
|
|
|
|
case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) &&
|
|
|
|
|
load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 0, 1);
|
|
|
|
|
default:
|
|
|
|
|
assert(0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (num_dest_comp == 3 && start_comp == 0)
|
|
|
|
|
return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) &&
|
|
|
|
|
load_interpolated_one_comp(dest, params, op2_interp_z);
|
|
|
|
|
|
|
|
|
|
int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp;
|
|
|
|
|
|
|
|
|
|
bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc);
|
|
|
|
|
success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3);
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShader::load_interpolated_one_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op)
|
|
|
|
|
{
|
|
|
|
|
auto group = new AluGroup();
|
|
|
|
|
bool success = true;
|
|
|
|
|
|
|
|
|
|
AluInstr *ir = nullptr;
|
|
|
|
|
for (unsigned i = 0; i < 2 && success; ++i) {
|
|
|
|
|
int chan = i;
|
|
|
|
|
if (op == op2_interp_z)
|
|
|
|
|
chan += 2;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ir = new AluInstr(op, dest[chan],
|
|
|
|
|
i & 1 ? params.j : params.i,
|
|
|
|
|
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
|
|
|
|
|
i == 0 ? AluInstr::write : AluInstr::last);
|
|
|
|
|
|
|
|
|
|
ir->set_bank_swizzle(alu_vec_210);
|
|
|
|
|
success = group->add_instruction(ir);
|
|
|
|
|
}
|
|
|
|
|
ir->set_alu_flag(alu_last_instr);
|
|
|
|
|
if (success)
|
|
|
|
|
emit_instruction(group);
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShader::load_interpolated_two_comp(RegisterVec4& dest, const InterpolateParams& params, EAluOp op, int writemask)
|
|
|
|
|
{
|
|
|
|
|
auto group = new AluGroup();
|
|
|
|
|
bool success = true;
|
|
|
|
|
|
|
|
|
|
AluInstr *ir = nullptr;
|
|
|
|
|
assert(params.j);
|
|
|
|
|
assert(params.i);
|
|
|
|
|
for (unsigned i = 0; i < 4 ; ++i) {
|
|
|
|
|
ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
|
|
|
|
|
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
|
|
|
|
|
(writemask & (1 << i)) ? AluInstr::write : AluInstr::empty);
|
|
|
|
|
ir->set_bank_swizzle(alu_vec_210);
|
|
|
|
|
success = group->add_instruction(ir);
|
|
|
|
|
}
|
|
|
|
|
ir->set_alu_flag(alu_last_instr);
|
|
|
|
|
if (success)
|
|
|
|
|
emit_instruction(group);
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShader::load_interpolated_two_comp_for_one(RegisterVec4& dest, const InterpolateParams& params, EAluOp op,
|
|
|
|
|
UNUSED int start, int comp)
|
|
|
|
|
{
|
|
|
|
|
auto group = new AluGroup();
|
|
|
|
|
bool success = true;
|
|
|
|
|
AluInstr *ir = nullptr;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 4 ; ++i) {
|
|
|
|
|
ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
|
|
|
|
|
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
|
|
|
|
|
i == comp ? AluInstr::write : AluInstr::empty);
|
|
|
|
|
ir->set_bank_swizzle(alu_vec_210);
|
|
|
|
|
success = group->add_instruction(ir);
|
|
|
|
|
}
|
|
|
|
|
ir->set_alu_flag(alu_last_instr);
|
|
|
|
|
if (success)
|
|
|
|
|
emit_instruction(group);
|
|
|
|
|
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int FragmentShader::do_allocate_reserved_registers()
|
|
|
|
|
{
|
|
|
|
|
for (unsigned i = 0; i < s_max_interpolators; ++i) {
|
|
|
|
|
if (m_interpolators_used.test(i)) {
|
|
|
|
|
sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
|
|
|
|
|
m_interpolator[i].enabled = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int num_baryc = 0;
|
|
|
|
|
for (int i = 0; i < 6; ++i) {
|
|
|
|
|
if (m_interpolator[i].enabled) {
|
|
|
|
|
sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
|
|
|
|
|
unsigned sel = num_baryc / 2;
|
|
|
|
|
unsigned chan = 2 * (num_baryc % 2);
|
|
|
|
|
|
|
|
|
|
m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1);
|
|
|
|
|
m_interpolator[i].i->pin_live_range(true, false);
|
|
|
|
|
|
|
|
|
|
m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan);
|
|
|
|
|
m_interpolator[i].j->pin_live_range(true, false);
|
|
|
|
|
|
|
|
|
|
m_interpolator[i].ij_index = num_baryc++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
int num_baryc = allocate_register_inputs();
|
|
|
|
|
|
|
|
|
|
int next_register = (num_baryc + 1) >> 1;
|
|
|
|
|
|
|
|
|
@@ -658,90 +467,6 @@ bool FragmentShader::scan_input(nir_intrinsic_instr *intr, int index_src_id)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShader::load_barycentric_at_sample(nir_intrinsic_instr* instr)
|
|
|
|
|
{
|
|
|
|
|
auto& vf = value_factory();
|
|
|
|
|
RegisterVec4 slope = vf.temp_vec4(pin_group);
|
|
|
|
|
auto src = emit_load_to_register(vf.src(instr->src[0], 0));
|
|
|
|
|
auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0,
|
|
|
|
|
R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
|
|
|
|
|
|
|
|
|
|
fetch->set_fetch_flag(FetchInstr::srf_mode);
|
|
|
|
|
emit_instruction(fetch);
|
|
|
|
|
|
|
|
|
|
auto grad = vf.temp_vec4(pin_group);
|
|
|
|
|
|
|
|
|
|
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
|
|
|
|
|
assert(interpolator.enabled);
|
|
|
|
|
|
|
|
|
|
RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
|
|
|
|
|
|
|
|
|
|
auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0);
|
|
|
|
|
tex->set_tex_flag(TexInstr::grad_fine);
|
|
|
|
|
tex->set_tex_flag(TexInstr::x_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::y_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::z_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::w_unnormalized);
|
|
|
|
|
emit_instruction(tex);
|
|
|
|
|
|
|
|
|
|
tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0);
|
|
|
|
|
tex->set_tex_flag(TexInstr::x_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::y_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::z_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::w_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::grad_fine);
|
|
|
|
|
emit_instruction(tex);
|
|
|
|
|
|
|
|
|
|
auto tmp0 = vf.temp_register();
|
|
|
|
|
auto tmp1 = vf.temp_register();
|
|
|
|
|
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
|
|
|
|
|
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write}));
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr}));
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShader::load_barycentric_at_offset(nir_intrinsic_instr* instr)
|
|
|
|
|
{
|
|
|
|
|
auto& vf = value_factory();
|
|
|
|
|
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
|
|
|
|
|
|
|
|
|
|
auto help = vf.temp_vec4(pin_group);
|
|
|
|
|
RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
|
|
|
|
|
|
|
|
|
|
auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0);
|
|
|
|
|
getgradh->set_tex_flag(TexInstr::x_unnormalized);
|
|
|
|
|
getgradh->set_tex_flag(TexInstr::y_unnormalized);
|
|
|
|
|
getgradh->set_tex_flag(TexInstr::z_unnormalized);
|
|
|
|
|
getgradh->set_tex_flag(TexInstr::w_unnormalized);
|
|
|
|
|
getgradh->set_tex_flag(TexInstr::grad_fine);
|
|
|
|
|
emit_instruction(getgradh);
|
|
|
|
|
|
|
|
|
|
auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0);
|
|
|
|
|
getgradv->set_tex_flag(TexInstr::x_unnormalized);
|
|
|
|
|
getgradv->set_tex_flag(TexInstr::y_unnormalized);
|
|
|
|
|
getgradv->set_tex_flag(TexInstr::z_unnormalized);
|
|
|
|
|
getgradv->set_tex_flag(TexInstr::w_unnormalized);
|
|
|
|
|
getgradv->set_tex_flag(TexInstr::grad_fine);
|
|
|
|
|
emit_instruction(getgradv);
|
|
|
|
|
|
|
|
|
|
auto ofs_x = vf.src(instr->src[0], 0);
|
|
|
|
|
auto ofs_y = vf.src(instr->src[0], 1);
|
|
|
|
|
auto tmp0 = vf.temp_register();
|
|
|
|
|
auto tmp1 = vf.temp_register();
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr}));
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool FragmentShader::emit_export_pixel(nir_intrinsic_instr& intr, int num_outputs)
|
|
|
|
|
{
|
|
|
|
|
RegisterVec4::Swizzle swizzle;
|
|
|
|
@@ -890,7 +615,319 @@ void FragmentShader::do_print_properties(std::ostream& os) const
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FragmentShader::Interpolator::Interpolator():
|
|
|
|
|
bool FragmentShaderEG::load_input_hw(nir_intrinsic_instr *intr)
|
|
|
|
|
{
|
|
|
|
|
auto& vf = value_factory();
|
|
|
|
|
auto io = input(nir_intrinsic_base(intr));
|
|
|
|
|
auto comp = nir_intrinsic_component(intr);
|
|
|
|
|
|
|
|
|
|
bool need_temp = comp > 0 || !intr->dest.is_ssa;
|
|
|
|
|
AluInstr *ir = nullptr;
|
|
|
|
|
for (unsigned i = 0; i < nir_dest_num_components(intr->dest) ; ++i) {
|
|
|
|
|
if (need_temp) {
|
|
|
|
|
auto tmp = vf.temp_register(comp + i);
|
|
|
|
|
ir = new AluInstr(op1_interp_load_p0,
|
|
|
|
|
tmp,
|
|
|
|
|
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i + comp),
|
|
|
|
|
AluInstr::last_write);
|
|
|
|
|
emit_instruction(ir);
|
|
|
|
|
emit_instruction(new AluInstr(op1_mov, vf.dest(intr->dest, i, pin_chan), tmp, AluInstr::last_write));
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
|
|
ir = new AluInstr(op1_interp_load_p0,
|
|
|
|
|
vf.dest(intr->dest, i, pin_chan),
|
|
|
|
|
new InlineConstant(ALU_SRC_PARAM_BASE + io.lds_pos(), i),
|
|
|
|
|
AluInstr::write);
|
|
|
|
|
emit_instruction(ir);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
ir->set_alu_flag(alu_last_instr);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShaderEG::allocate_register_inputs()
|
|
|
|
|
{
|
|
|
|
|
for (unsigned i = 0; i < s_max_interpolators; ++i) {
|
|
|
|
|
if (interpolators_used(i)) {
|
|
|
|
|
sfn_log << SfnLog::io << "Interpolator " << i << " test enabled\n";
|
|
|
|
|
m_interpolator[i].enabled = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int num_baryc = 0;
|
|
|
|
|
for (int i = 0; i < 6; ++i) {
|
|
|
|
|
if (m_interpolator[i].enabled) {
|
|
|
|
|
sfn_log << SfnLog::io << "Interpolator " << i << " is enabled with ij=" << num_baryc <<" \n";
|
|
|
|
|
unsigned sel = num_baryc / 2;
|
|
|
|
|
unsigned chan = 2 * (num_baryc % 2);
|
|
|
|
|
|
|
|
|
|
m_interpolator[i].i = value_factory().allocate_pinned_register(sel, chan + 1);
|
|
|
|
|
m_interpolator[i].i->pin_live_range(true, false);
|
|
|
|
|
|
|
|
|
|
m_interpolator[i].j = value_factory().allocate_pinned_register(sel, chan);
|
|
|
|
|
m_interpolator[i].j->pin_live_range(true, false);
|
|
|
|
|
|
|
|
|
|
m_interpolator[i].ij_index = num_baryc++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return num_baryc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShaderEG::process_stage_intrinsic_hw(nir_intrinsic_instr *intr)
|
|
|
|
|
{
|
|
|
|
|
auto& vf = value_factory();
|
|
|
|
|
switch (intr->intrinsic) {
|
|
|
|
|
case nir_intrinsic_load_barycentric_centroid:
|
|
|
|
|
case nir_intrinsic_load_barycentric_pixel:
|
|
|
|
|
case nir_intrinsic_load_barycentric_sample: {
|
|
|
|
|
unsigned ij = barycentric_ij_index(intr);
|
|
|
|
|
vf.inject_value(intr->dest, 0, m_interpolator[ij].i);
|
|
|
|
|
vf.inject_value(intr->dest, 1, m_interpolator[ij].j);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
case nir_intrinsic_load_input:
|
|
|
|
|
return load_input(intr);
|
|
|
|
|
case nir_intrinsic_load_barycentric_at_offset:
|
|
|
|
|
return load_barycentric_at_offset(intr);
|
|
|
|
|
case nir_intrinsic_load_barycentric_at_sample:
|
|
|
|
|
return load_barycentric_at_sample(intr);
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShaderEG::load_interpolated_input_hw(nir_intrinsic_instr *intr)
|
|
|
|
|
{
|
|
|
|
|
auto& vf = value_factory();
|
|
|
|
|
auto param = nir_src_as_const_value(intr->src[1]);
|
|
|
|
|
assert(param && "Indirect PS inputs not (yet) supported");
|
|
|
|
|
|
|
|
|
|
int dest_num_comp = nir_dest_num_components(intr->dest);
|
|
|
|
|
int start_comp = nir_intrinsic_component(intr);
|
|
|
|
|
bool need_temp = start_comp > 0 || !intr->dest.is_ssa;
|
|
|
|
|
|
|
|
|
|
auto dst = need_temp ? vf.temp_vec4(pin_chan) : vf.dest_vec4(intr->dest, pin_chan);
|
|
|
|
|
|
|
|
|
|
InterpolateParams params;
|
|
|
|
|
|
|
|
|
|
params.i = vf.src(intr->src[0], 0);
|
|
|
|
|
params.j = vf.src(intr->src[0], 1);
|
|
|
|
|
params.base = input(nir_intrinsic_base(intr)).lds_pos();
|
|
|
|
|
|
|
|
|
|
if (!load_interpolated(dst, params, dest_num_comp, start_comp))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (need_temp) {
|
|
|
|
|
AluInstr *ir = nullptr;
|
|
|
|
|
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
|
|
|
|
|
auto real_dst = vf.dest(intr->dest, i, pin_chan);
|
|
|
|
|
ir = new AluInstr(op1_mov, real_dst, dst[i + start_comp], AluInstr::write);
|
|
|
|
|
emit_instruction(ir);
|
|
|
|
|
}
|
|
|
|
|
assert(ir);
|
|
|
|
|
ir->set_alu_flag(alu_last_instr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShaderEG::load_interpolated(RegisterVec4& dest, const InterpolateParams& params,
|
|
|
|
|
int num_dest_comp, int start_comp)
|
|
|
|
|
{
|
|
|
|
|
sfn_log << SfnLog::io << "Using Interpolator (" << *params.j << ", " << *params.i << ")" << "\n";
|
|
|
|
|
|
|
|
|
|
if (num_dest_comp == 1) {
|
|
|
|
|
switch (start_comp) {
|
|
|
|
|
case 0: return load_interpolated_one_comp(dest, params, op2_interp_x);
|
|
|
|
|
case 1: return load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1);
|
|
|
|
|
case 2: return load_interpolated_one_comp(dest, params, op2_interp_z);
|
|
|
|
|
case 3: return load_interpolated_two_comp_for_one(dest, params, op2_interp_zw, 3);
|
|
|
|
|
default:
|
|
|
|
|
assert(0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (num_dest_comp == 2) {
|
|
|
|
|
switch (start_comp) {
|
|
|
|
|
case 0: return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3);
|
|
|
|
|
case 2: return load_interpolated_two_comp(dest, params, op2_interp_zw, 0xc);
|
|
|
|
|
case 1: return load_interpolated_one_comp(dest, params, op2_interp_z) &&
|
|
|
|
|
load_interpolated_two_comp_for_one(dest, params, op2_interp_xy, 1);
|
|
|
|
|
default:
|
|
|
|
|
assert(0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (num_dest_comp == 3 && start_comp == 0)
|
|
|
|
|
return load_interpolated_two_comp(dest, params, op2_interp_xy, 0x3) &&
|
|
|
|
|
load_interpolated_one_comp(dest, params, op2_interp_z);
|
|
|
|
|
|
|
|
|
|
int full_write_mask = ((1 << num_dest_comp) - 1) << start_comp;
|
|
|
|
|
|
|
|
|
|
bool success = load_interpolated_two_comp(dest, params, op2_interp_zw, full_write_mask & 0xc);
|
|
|
|
|
success &= load_interpolated_two_comp(dest, params, op2_interp_xy, full_write_mask & 0x3);
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool FragmentShaderEG::load_barycentric_at_sample(nir_intrinsic_instr* instr)
|
|
|
|
|
{
|
|
|
|
|
auto& vf = value_factory();
|
|
|
|
|
RegisterVec4 slope = vf.temp_vec4(pin_group);
|
|
|
|
|
auto src = emit_load_to_register(vf.src(instr->src[0], 0));
|
|
|
|
|
auto fetch = new LoadFromBuffer(slope, {0, 1,2, 3}, src, 0,
|
|
|
|
|
R600_BUFFER_INFO_CONST_BUFFER, nullptr, fmt_32_32_32_32_float);
|
|
|
|
|
|
|
|
|
|
fetch->set_fetch_flag(FetchInstr::srf_mode);
|
|
|
|
|
emit_instruction(fetch);
|
|
|
|
|
|
|
|
|
|
auto grad = vf.temp_vec4(pin_group);
|
|
|
|
|
|
|
|
|
|
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
|
|
|
|
|
assert(interpolator.enabled);
|
|
|
|
|
|
|
|
|
|
RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
|
|
|
|
|
|
|
|
|
|
auto tex = new TexInstr(TexInstr::get_gradient_h, grad, {0, 1, 7, 7}, interp, 0, 0);
|
|
|
|
|
tex->set_tex_flag(TexInstr::grad_fine);
|
|
|
|
|
tex->set_tex_flag(TexInstr::x_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::y_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::z_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::w_unnormalized);
|
|
|
|
|
emit_instruction(tex);
|
|
|
|
|
|
|
|
|
|
tex = new TexInstr(TexInstr::get_gradient_v, grad, {7,7,0,1}, interp, 0, 0);
|
|
|
|
|
tex->set_tex_flag(TexInstr::x_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::y_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::z_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::w_unnormalized);
|
|
|
|
|
tex->set_tex_flag(TexInstr::grad_fine);
|
|
|
|
|
emit_instruction(tex);
|
|
|
|
|
|
|
|
|
|
auto tmp0 = vf.temp_register();
|
|
|
|
|
auto tmp1 = vf.temp_register();
|
|
|
|
|
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, tmp0, grad[0], slope[2], interpolator.j, {alu_write}));
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, tmp1, grad[1], slope[2], interpolator.i, {alu_write, alu_last_instr}));
|
|
|
|
|
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), grad[3], slope[3], tmp1, {alu_write}));
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), grad[2], slope[3], tmp0, {alu_write, alu_last_instr}));
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShaderEG::load_barycentric_at_offset(nir_intrinsic_instr* instr)
|
|
|
|
|
{
|
|
|
|
|
auto& vf = value_factory();
|
|
|
|
|
auto interpolator = m_interpolator[barycentric_ij_index(instr)];
|
|
|
|
|
|
|
|
|
|
auto help = vf.temp_vec4(pin_group);
|
|
|
|
|
RegisterVec4 interp(interpolator.j, interpolator.i, nullptr, nullptr, pin_group);
|
|
|
|
|
|
|
|
|
|
auto getgradh = new TexInstr(TexInstr::get_gradient_h, help, {0,1,7,7}, interp, 0, 0);
|
|
|
|
|
getgradh->set_tex_flag(TexInstr::x_unnormalized);
|
|
|
|
|
getgradh->set_tex_flag(TexInstr::y_unnormalized);
|
|
|
|
|
getgradh->set_tex_flag(TexInstr::z_unnormalized);
|
|
|
|
|
getgradh->set_tex_flag(TexInstr::w_unnormalized);
|
|
|
|
|
getgradh->set_tex_flag(TexInstr::grad_fine);
|
|
|
|
|
emit_instruction(getgradh);
|
|
|
|
|
|
|
|
|
|
auto getgradv = new TexInstr(TexInstr::get_gradient_v, help, {7,7,0,1}, interp, 0, 0);
|
|
|
|
|
getgradv->set_tex_flag(TexInstr::x_unnormalized);
|
|
|
|
|
getgradv->set_tex_flag(TexInstr::y_unnormalized);
|
|
|
|
|
getgradv->set_tex_flag(TexInstr::z_unnormalized);
|
|
|
|
|
getgradv->set_tex_flag(TexInstr::w_unnormalized);
|
|
|
|
|
getgradv->set_tex_flag(TexInstr::grad_fine);
|
|
|
|
|
emit_instruction(getgradv);
|
|
|
|
|
|
|
|
|
|
auto ofs_x = vf.src(instr->src[0], 0);
|
|
|
|
|
auto ofs_y = vf.src(instr->src[0], 1);
|
|
|
|
|
auto tmp0 = vf.temp_register();
|
|
|
|
|
auto tmp1 = vf.temp_register();
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, tmp0, help[0], ofs_x, interpolator.j, {alu_write}));
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, tmp1, help[1], ofs_x, interpolator.i, {alu_write, alu_last_instr}));
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 0, pin_none), help[3], ofs_y, tmp1, {alu_write}));
|
|
|
|
|
emit_instruction(new AluInstr(op3_muladd, vf.dest(instr->dest, 1, pin_none), help[2], ofs_y, tmp0, {alu_write, alu_last_instr}));
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShaderEG::load_interpolated_one_comp(RegisterVec4& dest,
|
|
|
|
|
const InterpolateParams& params,
|
|
|
|
|
EAluOp op)
|
|
|
|
|
{
|
|
|
|
|
auto group = new AluGroup();
|
|
|
|
|
bool success = true;
|
|
|
|
|
|
|
|
|
|
AluInstr *ir = nullptr;
|
|
|
|
|
for (unsigned i = 0; i < 2 && success; ++i) {
|
|
|
|
|
int chan = i;
|
|
|
|
|
if (op == op2_interp_z)
|
|
|
|
|
chan += 2;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ir = new AluInstr(op, dest[chan],
|
|
|
|
|
i & 1 ? params.j : params.i,
|
|
|
|
|
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, chan),
|
|
|
|
|
i == 0 ? AluInstr::write : AluInstr::last);
|
|
|
|
|
|
|
|
|
|
ir->set_bank_swizzle(alu_vec_210);
|
|
|
|
|
success = group->add_instruction(ir);
|
|
|
|
|
}
|
|
|
|
|
ir->set_alu_flag(alu_last_instr);
|
|
|
|
|
if (success)
|
|
|
|
|
emit_instruction(group);
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShaderEG::load_interpolated_two_comp(RegisterVec4& dest,
|
|
|
|
|
const InterpolateParams& params,
|
|
|
|
|
EAluOp op, int writemask)
|
|
|
|
|
{
|
|
|
|
|
auto group = new AluGroup();
|
|
|
|
|
bool success = true;
|
|
|
|
|
|
|
|
|
|
AluInstr *ir = nullptr;
|
|
|
|
|
assert(params.j);
|
|
|
|
|
assert(params.i);
|
|
|
|
|
for (unsigned i = 0; i < 4 ; ++i) {
|
|
|
|
|
ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
|
|
|
|
|
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
|
|
|
|
|
(writemask & (1 << i)) ? AluInstr::write : AluInstr::empty);
|
|
|
|
|
ir->set_bank_swizzle(alu_vec_210);
|
|
|
|
|
success = group->add_instruction(ir);
|
|
|
|
|
}
|
|
|
|
|
ir->set_alu_flag(alu_last_instr);
|
|
|
|
|
if (success)
|
|
|
|
|
emit_instruction(group);
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool FragmentShaderEG::load_interpolated_two_comp_for_one(RegisterVec4& dest,
|
|
|
|
|
const InterpolateParams& params, EAluOp op,
|
|
|
|
|
int comp)
|
|
|
|
|
{
|
|
|
|
|
auto group = new AluGroup();
|
|
|
|
|
bool success = true;
|
|
|
|
|
AluInstr *ir = nullptr;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 4 ; ++i) {
|
|
|
|
|
ir = new AluInstr(op, dest[i], i & 1 ? params.j : params.i,
|
|
|
|
|
new InlineConstant(ALU_SRC_PARAM_BASE + params.base, i),
|
|
|
|
|
i == comp ? AluInstr::write : AluInstr::empty);
|
|
|
|
|
ir->set_bank_swizzle(alu_vec_210);
|
|
|
|
|
success = group->add_instruction(ir);
|
|
|
|
|
}
|
|
|
|
|
ir->set_alu_flag(alu_last_instr);
|
|
|
|
|
if (success)
|
|
|
|
|
emit_instruction(group);
|
|
|
|
|
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FragmentShaderEG::Interpolator::Interpolator():
|
|
|
|
|
enabled(false)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|