r600g/sb: SB support for UBO indexing
Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
committed by
Dave Airlie
parent
80c5062abf
commit
1befb7ed98
@@ -166,8 +166,6 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
|
||||
if (rctx->b.chip_class <= R700) {
|
||||
use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY);
|
||||
}
|
||||
/* disable SB for shaders using ubo array indexing as it doesn't handle those currently */
|
||||
use_sb &= !shader->shader.uses_ubo_indexing;
|
||||
/* disable SB for shaders using doubles */
|
||||
use_sb &= !shader->shader.uses_doubles;
|
||||
|
||||
@@ -1250,9 +1248,6 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ctx->src[i].kc_rel)
|
||||
ctx->shader->uses_ubo_indexing = true;
|
||||
|
||||
if (ctx->src[i].rel) {
|
||||
int chan = inst->Src[i].Indirect.Swizzle;
|
||||
int treg = r600_get_temp(ctx);
|
||||
@@ -1936,7 +1931,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
ctx.gs_next_vertex = 0;
|
||||
ctx.gs_stream_output_info = &so;
|
||||
|
||||
shader->uses_ubo_indexing = false;
|
||||
ctx.face_gpr = -1;
|
||||
ctx.fixed_pt_position_gpr = -1;
|
||||
ctx.fragcoord_input = -1;
|
||||
|
||||
@@ -75,8 +75,6 @@ struct r600_shader {
|
||||
boolean has_txq_cube_array_z_comp;
|
||||
boolean uses_tex_buffers;
|
||||
boolean gs_prim_id_input;
|
||||
/* Temporarily workaround SB not handling ubo indexing */
|
||||
boolean uses_ubo_indexing;
|
||||
|
||||
/* Size in bytes of a data item in the ring(s) (single vertex data).
|
||||
Stages with only one ring items 123 will be set to 0. */
|
||||
|
||||
@@ -478,7 +478,9 @@ struct bc_cf {
|
||||
|
||||
bool is_alu_extended() {
|
||||
assert(op_ptr->flags & CF_ALU);
|
||||
return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE;
|
||||
return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
|
||||
kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
|
||||
kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
@@ -515,7 +515,7 @@ void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg
|
||||
|
||||
void bc_finalizer::emit_set_grad(fetch_node* f) {
|
||||
|
||||
assert(f->src.size() == 12);
|
||||
assert(f->src.size() == 12 || f->src.size() == 13);
|
||||
unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
|
||||
|
||||
unsigned arg_start = 0;
|
||||
@@ -810,8 +810,8 @@ void bc_finalizer::finalize_cf(cf_node* c) {
|
||||
}
|
||||
|
||||
sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
|
||||
unsigned sel = v->select.sel();
|
||||
unsigned bank = sel >> 12;
|
||||
unsigned sel = v->select.kcache_sel();
|
||||
unsigned bank = v->select.kcache_bank();
|
||||
unsigned chan = v->select.chan();
|
||||
static const unsigned kc_base[] = {128, 160, 256, 288};
|
||||
|
||||
|
||||
@@ -338,6 +338,7 @@ void bc_parser::save_set_cf_index(value *val, unsigned idx)
|
||||
value *bc_parser::get_cf_index_value(unsigned idx)
|
||||
{
|
||||
assert(idx <= 1);
|
||||
assert(cf_index_value[idx]);
|
||||
return cf_index_value[idx];
|
||||
}
|
||||
void bc_parser::save_mova(alu_node *mova)
|
||||
@@ -361,6 +362,7 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
|
||||
for (node_iterator I = g->begin(), E = g->end();
|
||||
I != E; ++I) {
|
||||
n = static_cast<alu_node*>(*I);
|
||||
bool ubo_indexing[2] = {};
|
||||
|
||||
if (!sh->assign_slot(n, slots[cgroup])) {
|
||||
assert(!"alu slot assignment failed");
|
||||
@@ -460,7 +462,12 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
|
||||
|
||||
bc_kcache &kc = cf->bc.kc[kc_set];
|
||||
kc_addr = (kc.addr << 4) + (sel & 0x1F);
|
||||
n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan);
|
||||
n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode);
|
||||
|
||||
if (kc.index_mode != KC_INDEX_NONE) {
|
||||
assert(kc.index_mode != KC_LOCK_LOOP);
|
||||
ubo_indexing[kc.index_mode - KC_INDEX_0] = true;
|
||||
}
|
||||
} else if (src.sel < MAX_GPR) {
|
||||
value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel);
|
||||
|
||||
@@ -497,6 +504,15 @@ int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// add UBO index values if any as dependencies
|
||||
if (ubo_indexing[0]) {
|
||||
n->src.push_back(get_cf_index_value(0));
|
||||
}
|
||||
if (ubo_indexing[1]) {
|
||||
n->src.push_back(get_cf_index_value(1));
|
||||
}
|
||||
|
||||
if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) &&
|
||||
ctx.is_cayman())
|
||||
// Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX
|
||||
@@ -644,6 +660,9 @@ int bc_parser::prepare_fetch_clause(cf_node *cf) {
|
||||
if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) {
|
||||
n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1));
|
||||
}
|
||||
if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
|
||||
n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == V_SQ_CF_INDEX_1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -403,7 +403,8 @@ bool expr_handler::fold_alu_op1(alu_node& n) {
|
||||
if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
|
||||
n.bc.op == ALU_OP1_MOVA_GPR_INT)
|
||||
&& n.bc.clamp == 0 && n.bc.omod == 0
|
||||
&& n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0) {
|
||||
&& n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 &&
|
||||
n.src.size() == 1 /* RIM/SIM can be appended as additional values */) {
|
||||
assign_source(n.dst[0], v0);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -62,6 +62,13 @@ struct sel_chan
|
||||
|
||||
static unsigned sel(unsigned idx) { return (idx-1) >> 2; }
|
||||
static unsigned chan(unsigned idx) { return (idx-1) & 3; }
|
||||
|
||||
sel_chan(unsigned bank, unsigned index,
|
||||
unsigned chan, alu_kcache_index_mode index_mode)
|
||||
: id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {}
|
||||
unsigned kcache_index_mode() const { return sel() >> 28; }
|
||||
unsigned kcache_sel() const { return sel() & 0x0fffffffu; }
|
||||
unsigned kcache_bank() const { return kcache_sel() >> 12; }
|
||||
};
|
||||
|
||||
inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) {
|
||||
|
||||
@@ -843,7 +843,7 @@ static alu_node *create_set_idx(shader &sh, unsigned ar_idx) {
|
||||
a->dst.resize(1); // Dummy needed for recolor
|
||||
|
||||
PSC_DUMP(
|
||||
sblog << "created IDX load: "
|
||||
sblog << "created IDX load: ";
|
||||
dump::dump_op(a);
|
||||
sblog << "\n";
|
||||
);
|
||||
@@ -909,15 +909,21 @@ void post_scheduler::process_fetch(container_node *c) {
|
||||
sblog << " ";
|
||||
);
|
||||
|
||||
if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) {
|
||||
// TODO: If same values used can avoid reloading index register
|
||||
if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ||
|
||||
f->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
|
||||
unsigned index_mode = f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ?
|
||||
f->bc.sampler_index_mode : f->bc.resource_index_mode;
|
||||
|
||||
// Currently require prior opt passes to use one TEX per indexed op
|
||||
assert(f->parent->count() == 1);
|
||||
|
||||
value *v = f->src.back(); // Last src is index offset
|
||||
assert(v);
|
||||
|
||||
cur_bb->push_front(c);
|
||||
|
||||
load_index_register(v, f->bc.sampler_index_mode);
|
||||
load_index_register(v, index_mode);
|
||||
f->src.pop_back(); // Don't need index value any more
|
||||
|
||||
return;
|
||||
@@ -959,6 +965,7 @@ void post_scheduler::process_alu(container_node *c) {
|
||||
|
||||
if (uc) {
|
||||
n->remove();
|
||||
|
||||
pending.push_back(n);
|
||||
PSC_DUMP( sblog << "pending\n"; );
|
||||
} else {
|
||||
@@ -1101,6 +1108,18 @@ void post_scheduler::init_globals(val_set &s, bool prealloc) {
|
||||
}
|
||||
}
|
||||
|
||||
void post_scheduler::emit_index_registers() {
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
if (alu.current_idx[i]) {
|
||||
regmap = prev_regmap;
|
||||
alu.discard_current_group();
|
||||
|
||||
load_index_register(alu.current_idx[i], KC_INDEX_0 + i);
|
||||
alu.current_idx[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void post_scheduler::emit_clause() {
|
||||
|
||||
if (alu.current_ar) {
|
||||
@@ -1109,7 +1128,11 @@ void post_scheduler::emit_clause() {
|
||||
alu.emit_group();
|
||||
}
|
||||
|
||||
alu.emit_clause(cur_bb);
|
||||
if (!alu.is_empty()) {
|
||||
alu.emit_clause(cur_bb);
|
||||
}
|
||||
|
||||
emit_index_registers();
|
||||
}
|
||||
|
||||
void post_scheduler::schedule_alu(container_node *c) {
|
||||
@@ -1121,6 +1144,14 @@ void post_scheduler::schedule_alu(container_node *c) {
|
||||
prev_regmap = regmap;
|
||||
|
||||
if (!prepare_alu_group()) {
|
||||
if (alu.current_idx[0] || alu.current_idx[1]) {
|
||||
regmap = prev_regmap;
|
||||
emit_clause();
|
||||
init_globals(live, false);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (alu.current_ar) {
|
||||
emit_load_ar();
|
||||
continue;
|
||||
@@ -1132,6 +1163,7 @@ void post_scheduler::schedule_alu(container_node *c) {
|
||||
regmap = prev_regmap;
|
||||
emit_clause();
|
||||
init_globals(live, false);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1391,6 +1423,42 @@ bool post_scheduler::map_src_val(value *v) {
|
||||
}
|
||||
|
||||
bool post_scheduler::map_src_vec(vvec &vv, bool src) {
|
||||
if (src) {
|
||||
// Handle possible UBO indexing
|
||||
bool ubo_indexing[2] = { false, false };
|
||||
for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
|
||||
value *v = *I;
|
||||
if (!v)
|
||||
continue;
|
||||
|
||||
if (v->is_kcache()) {
|
||||
unsigned index_mode = v->select.kcache_index_mode();
|
||||
if (index_mode == KC_INDEX_0 || index_mode == KC_INDEX_1) {
|
||||
ubo_indexing[index_mode - KC_INDEX_0] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// idx values stored at end of src vec, see bc_parser::prepare_alu_group
|
||||
for (unsigned i = 2; i != 0; i--) {
|
||||
if (ubo_indexing[i-1]) {
|
||||
// TODO: skip adding value to kcache reservation somehow, causes
|
||||
// unnecessary group breaks and cache line locks
|
||||
value *v = vv.back();
|
||||
if (alu.current_idx[i-1] && alu.current_idx[i-1] != v) {
|
||||
PSC_DUMP(
|
||||
sblog << "IDX" << i-1 << " already set to " <<
|
||||
*alu.current_idx[i-1] << ", trying to set " << *v << "\n";
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
alu.current_idx[i-1] = v;
|
||||
PSC_DUMP(sblog << "IDX" << i-1 << " set to " << *v << "\n";);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
|
||||
value *v = *I;
|
||||
if (!v)
|
||||
@@ -1456,6 +1524,10 @@ void post_scheduler::dump_regmap() {
|
||||
sblog << " current_AR: " << *alu.current_ar << "\n";
|
||||
if (alu.current_pr)
|
||||
sblog << " current_PR: " << *alu.current_pr << "\n";
|
||||
if (alu.current_idx[0])
|
||||
sblog << " current IDX0: " << *alu.current_idx[0] << "\n";
|
||||
if (alu.current_idx[1])
|
||||
sblog << " current IDX1: " << *alu.current_idx[1] << "\n";
|
||||
}
|
||||
|
||||
void post_scheduler::recolor_locals() {
|
||||
@@ -1545,6 +1617,13 @@ unsigned post_scheduler::try_add_instruction(node *n) {
|
||||
|
||||
unsigned avail_slots = rt.avail_slots();
|
||||
|
||||
// Cannot schedule in same clause as instructions using this index value
|
||||
if (!n->dst.empty() && n->dst[0] &&
|
||||
(n->dst[0] == alu.current_idx[0] || n->dst[0] == alu.current_idx[1])) {
|
||||
PSC_DUMP(sblog << " CF_IDX source: " << *n->dst[0] << "\n";);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (n->is_alu_packed()) {
|
||||
alu_packed_node *p = static_cast<alu_packed_node*>(n);
|
||||
unsigned slots = p->get_slot_mask();
|
||||
@@ -1874,7 +1953,7 @@ alu_clause_tracker::alu_clause_tracker(shader &sh)
|
||||
grp0(sh), grp1(sh),
|
||||
group(), clause(),
|
||||
push_exec_mask(),
|
||||
current_ar(), current_pr() {}
|
||||
current_ar(), current_pr(), current_idx() {}
|
||||
|
||||
void alu_clause_tracker::emit_group() {
|
||||
|
||||
@@ -1931,6 +2010,8 @@ bool alu_clause_tracker::check_clause_limits() {
|
||||
|
||||
// reserving slots to load AR and PR values
|
||||
unsigned reserve_slots = (current_ar ? 1 : 0) + (current_pr ? 1 : 0);
|
||||
// ...and index registers
|
||||
reserve_slots += (current_idx[0] != NULL) + (current_idx[1] != NULL);
|
||||
|
||||
if (slot_count + slots > MAX_ALU_SLOTS - reserve_slots)
|
||||
return false;
|
||||
@@ -1996,13 +2077,15 @@ unsigned rp_kcache_tracker::get_lines(kc_lines& lines) {
|
||||
unsigned cnt = 0;
|
||||
|
||||
for (unsigned i = 0; i < sel_count; ++i) {
|
||||
unsigned line = rp[i];
|
||||
unsigned line = rp[i] & 0x1fffffffu;
|
||||
unsigned index_mode = rp[i] >> 29;
|
||||
|
||||
if (!line)
|
||||
return cnt;
|
||||
|
||||
--line;
|
||||
line = (sel_count == 2) ? line >> 5 : line >> 6;
|
||||
line |= index_mode << 29;
|
||||
|
||||
if (lines.insert(line).second)
|
||||
++cnt;
|
||||
@@ -2017,14 +2100,18 @@ bool alu_kcache_tracker::update_kc() {
|
||||
memcpy(old_kc, kc, sizeof(kc));
|
||||
|
||||
for (kc_lines::iterator I = lines.begin(), E = lines.end(); I != E; ++I) {
|
||||
unsigned line = *I;
|
||||
unsigned index_mode = *I >> 29;
|
||||
unsigned line = *I & 0x1fffffffu;
|
||||
unsigned bank = line >> 8;
|
||||
|
||||
assert(index_mode <= KC_INDEX_INVALID);
|
||||
line &= 0xFF;
|
||||
|
||||
if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line))
|
||||
++kc[c-1].mode;
|
||||
else {
|
||||
if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line) &&
|
||||
kc[c-1].index_mode == index_mode)
|
||||
{
|
||||
kc[c-1].mode = KC_LOCK_2;
|
||||
} else {
|
||||
if (c == max_kcs) {
|
||||
memcpy(kc, old_kc, sizeof(kc));
|
||||
return false;
|
||||
@@ -2034,6 +2121,7 @@ bool alu_kcache_tracker::update_kc() {
|
||||
|
||||
kc[c].bank = bank;
|
||||
kc[c].addr = line;
|
||||
kc[c].index_mode = index_mode;
|
||||
++c;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,6 +66,7 @@ public:
|
||||
class literal_tracker {
|
||||
literal lt[4];
|
||||
unsigned uc[4];
|
||||
|
||||
public:
|
||||
literal_tracker() : lt(), uc() {}
|
||||
|
||||
@@ -219,6 +220,8 @@ public:
|
||||
// bottom-up)
|
||||
value *current_ar;
|
||||
value *current_pr;
|
||||
// current values of CF_IDX registers that need preloading
|
||||
value *current_idx[2];
|
||||
|
||||
alu_clause_tracker(shader &sh);
|
||||
|
||||
@@ -256,6 +259,7 @@ class post_scheduler : public pass {
|
||||
|
||||
val_set cleared_interf;
|
||||
|
||||
void emit_index_registers();
|
||||
public:
|
||||
|
||||
post_scheduler(shader &sh) : pass(sh),
|
||||
|
||||
@@ -188,9 +188,9 @@ value* shader::create_temp_value() {
|
||||
return get_value(VLK_TEMP, id, 0);
|
||||
}
|
||||
|
||||
value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan) {
|
||||
value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode) {
|
||||
return get_ro_value(kcache_values, VLK_KCACHE,
|
||||
sel_chan((bank << 12) | index, chan));
|
||||
sel_chan(bank, index, chan, index_mode));
|
||||
}
|
||||
|
||||
void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
|
||||
|
||||
@@ -323,7 +323,7 @@ public:
|
||||
|
||||
|
||||
value* get_special_ro_value(unsigned sel);
|
||||
value* get_kcache_value(unsigned bank, unsigned index, unsigned chan);
|
||||
value* get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode);
|
||||
|
||||
value* get_value_version(value* v, unsigned ver);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user