freedreno/ir3/ra: add def/use iterators

Decouple the messy logic of figuring out vreg names defined/used by an
instruction from the logic of what to do about it by introducing
iterators.  There is still *some* array vs ssa special casing in
ra_block_compute_live_ranges(), but less than before.  And this will
avoid introducing a second copy of the def/use logic in a following
patch which uses the liveranges to calculate the maximum # of live
values (which is the optimal target for max physical register window
to round-robin within).

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272>
This commit is contained in:
Rob Clark
2020-03-21 14:44:44 -07:00
committed by Marge Bot
parent bf0aa7ed90
commit 6347c2ea89
2 changed files with 201 additions and 132 deletions
+42 -132
View File
@@ -585,159 +585,69 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
}
foreach_instr (instr, &block->instr_list) {
struct ir3_instruction *src;
struct ir3_register *reg;
if (writes_gpr(instr)) {
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_register *dst = instr->regs[0];
if (dst->flags & IR3_REG_ARRAY) {
struct ir3_array *arr =
ir3_lookup_array(ctx->ir, dst->array.id);
unsigned i;
foreach_def (name, ctx, instr) {
if (name_is_array(ctx, name)) {
struct ir3_array *arr = name_to_array(ctx, name);
arr->start_ip = MIN2(arr->start_ip, instr->ip);
arr->end_ip = MAX2(arr->end_ip, instr->ip);
/* set the node class now.. in case we don't encounter
* this array dst again. From register_alloc algo's
* perspective, these are all single/scalar regs:
*/
for (i = 0; i < arr->length; i++) {
for (unsigned i = 0; i < arr->length; i++) {
unsigned name = arr->base + i;
if(arr->half)
ra_set_node_class(ctx->g, name, ctx->set->half_classes[0]);
else
ra_set_node_class(ctx->g, name, ctx->set->classes[0]);
}
/* indirect write is treated like a write to all array
* elements, since we don't know which one is actually
* written:
*/
if (dst->flags & IR3_REG_RELATIV) {
for (i = 0; i < arr->length; i++) {
unsigned name = arr->base + i;
def(name, instr);
}
} else {
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
if (is_high(instr)) {
ra_set_node_class(ctx->g, name,
ctx->set->high_classes[id->cls - HIGH_OFFSET]);
} else if (is_half(instr)) {
ra_set_node_class(ctx->g, name,
ctx->set->half_classes[id->cls - HALF_OFFSET]);
} else {
unsigned name = arr->base + dst->array.offset;
def(name, instr);
}
} else if (id->defn == instr) {
/* in scalar pass, we aren't considering virtual register
* classes, ie. if an instruction writes a vec2, then it
* defines two different scalar register names.
*/
unsigned n = ctx->scalar_pass ? dest_regs(instr) : 1;
for (unsigned i = 0; i < n; i++) {
unsigned name = scalar_name(ctx, instr, i);
/* split/collect instructions have duplicate names
* as real instructions, so they skip the hashtable:
*/
if (ctx->name_to_instr && !((instr->opc == OPC_META_SPLIT) ||
(instr->opc == OPC_META_COLLECT))) {
/* this is slightly annoying, we can't just use an
* integer on the stack
*/
unsigned *key = ralloc(ctx->name_to_instr, unsigned);
*key = name;
debug_assert(!_mesa_hash_table_search(ctx->name_to_instr, key));
_mesa_hash_table_insert(ctx->name_to_instr, key, instr);
}
/* tex instructions actually have a wrmask, and
* don't touch masked out components. We can't do
* anything useful about that in the first pass,
* but in the scalar pass we can realize these
* registers are available:
*/
if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
!(instr->regs[0]->wrmask & (1 << i)))
continue;
def(name, instr);
if ((instr->opc == OPC_META_INPUT) && first_non_input)
use(name, first_non_input);
if (is_high(instr)) {
ra_set_node_class(ctx->g, name,
ctx->set->high_classes[id->cls - HIGH_OFFSET]);
} else if (is_half(instr)) {
ra_set_node_class(ctx->g, name,
ctx->set->half_classes[id->cls - HALF_OFFSET]);
} else {
ra_set_node_class(ctx->g, name,
ctx->set->classes[id->cls]);
}
ra_set_node_class(ctx->g, name,
ctx->set->classes[id->cls]);
}
}
def(name, instr);
if ((instr->opc == OPC_META_INPUT) && first_non_input)
use(name, first_non_input);
}
foreach_src (reg, instr) {
if (reg->flags & IR3_REG_ARRAY) {
struct ir3_array *arr =
ir3_lookup_array(ctx->ir, reg->array.id);
foreach_use (name, ctx, instr) {
if (name_is_array(ctx, name)) {
struct ir3_array *arr = name_to_array(ctx, name);
arr->start_ip = MIN2(arr->start_ip, instr->ip);
arr->end_ip = MAX2(arr->end_ip, instr->ip);
/* indirect read is treated like a read from all array
* elements, since we don't know which one is actually
* read:
/* NOTE: arrays are not SSA so unconditionally
* set use bit:
*/
if (reg->flags & IR3_REG_RELATIV) {
unsigned i;
for (i = 0; i < arr->length; i++) {
unsigned name = arr->base + i;
use(name, instr);
BITSET_SET(bd->use, name);
}
} else {
unsigned name = arr->base + reg->array.offset;
use(name, instr);
/* NOTE: arrays are not SSA so unconditionally
* set use bit:
*/
BITSET_SET(bd->use, name);
debug_assert(reg->array.offset < arr->length);
}
} else if (ctx->scalar_pass) {
struct ir3_instruction *src = reg->instr;
/* skip things that aren't SSA: */
unsigned n = src ? dest_regs(src) : 0;
BITSET_SET(bd->use, name);
}
/* in scalar pass, we aren't considering virtual register
* classes, ie. if an instruction writes a vec2, then it
* defines two different scalar register names.
*
* We need to traverse up thru collect/split to find the
* actual non-meta instruction names for each of the
* components:
use(name, instr);
}
foreach_name (name, ctx, instr) {
/* split/collect instructions have duplicate names
* as real instructions, so they skip the hashtable:
*/
if (ctx->name_to_instr && !((instr->opc == OPC_META_SPLIT) ||
(instr->opc == OPC_META_COLLECT))) {
/* this is slightly annoying, we can't just use an
* integer on the stack
*/
for (unsigned i = 0; i < n; i++) {
/* Need to filter out a couple special cases, ie.
* writes to a0.x or p0.x:
*/
if (!writes_gpr(src))
continue;
/* split takes a src w/ wrmask potentially greater
* than 0x1, but it really only cares about a single
* component. This shows up in splits coming out of
* a tex instruction w/ wrmask=.z, for example.
*/
if ((instr->opc == OPC_META_SPLIT) &&
!(i == instr->split.off))
continue;
use(scalar_name(ctx, src, i), instr);
}
} else if ((src = ssa(reg)) && writes_gpr(src)) {
unsigned name = ra_name(ctx, &ctx->instrd[src->ip]);
use(name, instr);
unsigned *key = ralloc(ctx->name_to_instr, unsigned);
*key = name;
debug_assert(!_mesa_hash_table_search(ctx->name_to_instr, key));
_mesa_hash_table_insert(ctx->name_to_instr, key, instr);
}
}
}
+159
View File
@@ -134,6 +134,18 @@ struct ir3_ra_ctx {
/* Tracking for select_reg callback */
unsigned start_search_reg;
unsigned max_target;
/* Temporary buffer for def/use iterators
*
* The worst case should probably be an array w/ relative access (ie.
* all elements are def'd or use'd), and that can't be larger than
* the number of registers.
*
* NOTE we could declare this on the stack if needed, but I don't
* think there is a need for nested iterators.
*/
unsigned namebuf[NUM_REGS];
unsigned namecnt, nameidx;
};
static inline int
@@ -182,6 +194,153 @@ writes_gpr(struct ir3_instruction *instr)
return true;
}
#define NO_NAME ~0
/*
* Iterators to iterate the vreg names of an instructions def's and use's
*/
static inline unsigned
__ra_name_cnt(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
{
if (!instr)
return 0;
/* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */
if (!writes_gpr(instr) || (instr->regs[0]->flags & IR3_REG_ARRAY))
return 0;
/* in scalar pass, we aren't considering virtual register classes, ie.
* if an instruction writes a vec2, then it defines two different scalar
* register names.
*/
if (ctx->scalar_pass)
return dest_regs(instr);
return 1;
}
#define foreach_name_n(__name, __n, __ctx, __instr) \
for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \
(__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++)
#define foreach_name(__name, __ctx, __instr) \
foreach_name_n(__name, __n, __ctx, __instr)
static inline unsigned
__ra_itr_pop(struct ir3_ra_ctx *ctx)
{
if (ctx->nameidx < ctx->namecnt)
return ctx->namebuf[ctx->nameidx++];
return NO_NAME;
}
static inline void
__ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name)
{
assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf));
ctx->namebuf[ctx->namecnt++] = name;
}
static inline unsigned
__ra_init_def_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
{
/* nested use is not supported: */
assert(ctx->namecnt == ctx->nameidx);
ctx->namecnt = ctx->nameidx = 0;
if (!writes_gpr(instr))
return NO_NAME;
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
struct ir3_register *dst = instr->regs[0];
if (dst->flags & IR3_REG_ARRAY) {
struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id);
/* indirect write is treated like a write to all array
* elements, since we don't know which one is actually
* written:
*/
if (dst->flags & IR3_REG_RELATIV) {
for (unsigned i = 0; i < arr->length; i++) {
__ra_itr_push(ctx, arr->base + i);
}
} else {
__ra_itr_push(ctx, arr->base + dst->array.offset);
debug_assert(dst->array.offset < arr->length);
}
} else if (id->defn == instr) {
foreach_name_n (name, i, ctx, instr) {
/* tex instructions actually have a wrmask, and
* don't touch masked out components. We can't do
* anything useful about that in the first pass,
* but in the scalar pass we can realize these
* registers are available:
*/
if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
!(instr->regs[0]->wrmask & (1 << i)))
continue;
__ra_itr_push(ctx, name);
}
}
return __ra_itr_pop(ctx);
}
static inline unsigned
__ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
{
/* nested use is not supported: */
assert(ctx->namecnt == ctx->nameidx);
ctx->namecnt = ctx->nameidx = 0;
struct ir3_register *reg;
foreach_src (reg, instr) {
if (reg->flags & IR3_REG_ARRAY) {
struct ir3_array *arr =
ir3_lookup_array(ctx->ir, reg->array.id);
/* indirect read is treated like a read from all array
* elements, since we don't know which one is actually
* read:
*/
if (reg->flags & IR3_REG_RELATIV) {
for (unsigned i = 0; i < arr->length; i++) {
__ra_itr_push(ctx, arr->base + i);
}
} else {
__ra_itr_push(ctx, arr->base + reg->array.offset);
debug_assert(reg->array.offset < arr->length);
}
} else {
foreach_name_n (name, i, ctx, reg->instr) {
/* split takes a src w/ wrmask potentially greater
* than 0x1, but it really only cares about a single
* component. This shows up in splits coming out of
* a tex instruction w/ wrmask=.z, for example.
*/
if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) &&
!(i == instr->split.off))
continue;
__ra_itr_push(ctx, name);
}
}
}
return __ra_itr_pop(ctx);
}
#define foreach_def(__name, __ctx, __instr) \
for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \
__name != NO_NAME; __name = __ra_itr_pop(__ctx))
#define foreach_use(__name, __ctx, __instr) \
for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \
__name != NO_NAME; __name = __ra_itr_pop(__ctx))
int ra_size_to_class(unsigned sz, bool half, bool high);
#endif /* IR3_RA_H_ */