freedreno/ir3/ra: add def/use iterators
Decouple the messy logic of figuring out vreg names defined/used by an instruction from the logic of what to do about it by introducing iterators. There is still *some* array vs ssa special casing in ra_block_compute_live_ranges(), but less than before. And this will avoid introducing a second copy of the def/use logic in a following patch which uses the liveranges to calculate the maximum # of live values (which is the optimal target for max physical register window to round-robin within). Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272>
This commit is contained in:
+42
-132
@@ -585,159 +585,69 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
||||
}
|
||||
|
||||
foreach_instr (instr, &block->instr_list) {
|
||||
struct ir3_instruction *src;
|
||||
struct ir3_register *reg;
|
||||
|
||||
if (writes_gpr(instr)) {
|
||||
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
|
||||
struct ir3_register *dst = instr->regs[0];
|
||||
|
||||
if (dst->flags & IR3_REG_ARRAY) {
|
||||
struct ir3_array *arr =
|
||||
ir3_lookup_array(ctx->ir, dst->array.id);
|
||||
unsigned i;
|
||||
foreach_def (name, ctx, instr) {
|
||||
if (name_is_array(ctx, name)) {
|
||||
struct ir3_array *arr = name_to_array(ctx, name);
|
||||
|
||||
arr->start_ip = MIN2(arr->start_ip, instr->ip);
|
||||
arr->end_ip = MAX2(arr->end_ip, instr->ip);
|
||||
|
||||
/* set the node class now.. in case we don't encounter
|
||||
* this array dst again. From register_alloc algo's
|
||||
* perspective, these are all single/scalar regs:
|
||||
*/
|
||||
for (i = 0; i < arr->length; i++) {
|
||||
for (unsigned i = 0; i < arr->length; i++) {
|
||||
unsigned name = arr->base + i;
|
||||
if(arr->half)
|
||||
ra_set_node_class(ctx->g, name, ctx->set->half_classes[0]);
|
||||
else
|
||||
ra_set_node_class(ctx->g, name, ctx->set->classes[0]);
|
||||
}
|
||||
|
||||
/* indirect write is treated like a write to all array
|
||||
* elements, since we don't know which one is actually
|
||||
* written:
|
||||
*/
|
||||
if (dst->flags & IR3_REG_RELATIV) {
|
||||
for (i = 0; i < arr->length; i++) {
|
||||
unsigned name = arr->base + i;
|
||||
def(name, instr);
|
||||
}
|
||||
} else {
|
||||
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
|
||||
if (is_high(instr)) {
|
||||
ra_set_node_class(ctx->g, name,
|
||||
ctx->set->high_classes[id->cls - HIGH_OFFSET]);
|
||||
} else if (is_half(instr)) {
|
||||
ra_set_node_class(ctx->g, name,
|
||||
ctx->set->half_classes[id->cls - HALF_OFFSET]);
|
||||
} else {
|
||||
unsigned name = arr->base + dst->array.offset;
|
||||
def(name, instr);
|
||||
}
|
||||
} else if (id->defn == instr) {
|
||||
/* in scalar pass, we aren't considering virtual register
|
||||
* classes, ie. if an instruction writes a vec2, then it
|
||||
* defines two different scalar register names.
|
||||
*/
|
||||
unsigned n = ctx->scalar_pass ? dest_regs(instr) : 1;
|
||||
for (unsigned i = 0; i < n; i++) {
|
||||
unsigned name = scalar_name(ctx, instr, i);
|
||||
|
||||
/* split/collect instructions have duplicate names
|
||||
* as real instructions, so they skip the hashtable:
|
||||
*/
|
||||
if (ctx->name_to_instr && !((instr->opc == OPC_META_SPLIT) ||
|
||||
(instr->opc == OPC_META_COLLECT))) {
|
||||
/* this is slightly annoying, we can't just use an
|
||||
* integer on the stack
|
||||
*/
|
||||
unsigned *key = ralloc(ctx->name_to_instr, unsigned);
|
||||
*key = name;
|
||||
debug_assert(!_mesa_hash_table_search(ctx->name_to_instr, key));
|
||||
_mesa_hash_table_insert(ctx->name_to_instr, key, instr);
|
||||
}
|
||||
|
||||
/* tex instructions actually have a wrmask, and
|
||||
* don't touch masked out components. We can't do
|
||||
* anything useful about that in the first pass,
|
||||
* but in the scalar pass we can realize these
|
||||
* registers are available:
|
||||
*/
|
||||
if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
|
||||
!(instr->regs[0]->wrmask & (1 << i)))
|
||||
continue;
|
||||
|
||||
def(name, instr);
|
||||
|
||||
if ((instr->opc == OPC_META_INPUT) && first_non_input)
|
||||
use(name, first_non_input);
|
||||
|
||||
if (is_high(instr)) {
|
||||
ra_set_node_class(ctx->g, name,
|
||||
ctx->set->high_classes[id->cls - HIGH_OFFSET]);
|
||||
} else if (is_half(instr)) {
|
||||
ra_set_node_class(ctx->g, name,
|
||||
ctx->set->half_classes[id->cls - HALF_OFFSET]);
|
||||
} else {
|
||||
ra_set_node_class(ctx->g, name,
|
||||
ctx->set->classes[id->cls]);
|
||||
}
|
||||
ra_set_node_class(ctx->g, name,
|
||||
ctx->set->classes[id->cls]);
|
||||
}
|
||||
}
|
||||
|
||||
def(name, instr);
|
||||
|
||||
if ((instr->opc == OPC_META_INPUT) && first_non_input)
|
||||
use(name, first_non_input);
|
||||
}
|
||||
|
||||
foreach_src (reg, instr) {
|
||||
if (reg->flags & IR3_REG_ARRAY) {
|
||||
struct ir3_array *arr =
|
||||
ir3_lookup_array(ctx->ir, reg->array.id);
|
||||
foreach_use (name, ctx, instr) {
|
||||
if (name_is_array(ctx, name)) {
|
||||
struct ir3_array *arr = name_to_array(ctx, name);
|
||||
|
||||
arr->start_ip = MIN2(arr->start_ip, instr->ip);
|
||||
arr->end_ip = MAX2(arr->end_ip, instr->ip);
|
||||
|
||||
/* indirect read is treated like a read from all array
|
||||
* elements, since we don't know which one is actually
|
||||
* read:
|
||||
/* NOTE: arrays are not SSA so unconditionally
|
||||
* set use bit:
|
||||
*/
|
||||
if (reg->flags & IR3_REG_RELATIV) {
|
||||
unsigned i;
|
||||
for (i = 0; i < arr->length; i++) {
|
||||
unsigned name = arr->base + i;
|
||||
use(name, instr);
|
||||
BITSET_SET(bd->use, name);
|
||||
}
|
||||
} else {
|
||||
unsigned name = arr->base + reg->array.offset;
|
||||
use(name, instr);
|
||||
/* NOTE: arrays are not SSA so unconditionally
|
||||
* set use bit:
|
||||
*/
|
||||
BITSET_SET(bd->use, name);
|
||||
debug_assert(reg->array.offset < arr->length);
|
||||
}
|
||||
} else if (ctx->scalar_pass) {
|
||||
struct ir3_instruction *src = reg->instr;
|
||||
/* skip things that aren't SSA: */
|
||||
unsigned n = src ? dest_regs(src) : 0;
|
||||
BITSET_SET(bd->use, name);
|
||||
}
|
||||
|
||||
/* in scalar pass, we aren't considering virtual register
|
||||
* classes, ie. if an instruction writes a vec2, then it
|
||||
* defines two different scalar register names.
|
||||
*
|
||||
* We need to traverse up thru collect/split to find the
|
||||
* actual non-meta instruction names for each of the
|
||||
* components:
|
||||
use(name, instr);
|
||||
}
|
||||
|
||||
foreach_name (name, ctx, instr) {
|
||||
/* split/collect instructions have duplicate names
|
||||
* as real instructions, so they skip the hashtable:
|
||||
*/
|
||||
if (ctx->name_to_instr && !((instr->opc == OPC_META_SPLIT) ||
|
||||
(instr->opc == OPC_META_COLLECT))) {
|
||||
/* this is slightly annoying, we can't just use an
|
||||
* integer on the stack
|
||||
*/
|
||||
for (unsigned i = 0; i < n; i++) {
|
||||
/* Need to filter out a couple special cases, ie.
|
||||
* writes to a0.x or p0.x:
|
||||
*/
|
||||
if (!writes_gpr(src))
|
||||
continue;
|
||||
|
||||
/* split takes a src w/ wrmask potentially greater
|
||||
* than 0x1, but it really only cares about a single
|
||||
* component. This shows up in splits coming out of
|
||||
* a tex instruction w/ wrmask=.z, for example.
|
||||
*/
|
||||
if ((instr->opc == OPC_META_SPLIT) &&
|
||||
!(i == instr->split.off))
|
||||
continue;
|
||||
|
||||
use(scalar_name(ctx, src, i), instr);
|
||||
}
|
||||
} else if ((src = ssa(reg)) && writes_gpr(src)) {
|
||||
unsigned name = ra_name(ctx, &ctx->instrd[src->ip]);
|
||||
use(name, instr);
|
||||
unsigned *key = ralloc(ctx->name_to_instr, unsigned);
|
||||
*key = name;
|
||||
debug_assert(!_mesa_hash_table_search(ctx->name_to_instr, key));
|
||||
_mesa_hash_table_insert(ctx->name_to_instr, key, instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,6 +134,18 @@ struct ir3_ra_ctx {
|
||||
/* Tracking for select_reg callback */
|
||||
unsigned start_search_reg;
|
||||
unsigned max_target;
|
||||
|
||||
/* Temporary buffer for def/use iterators
|
||||
*
|
||||
* The worst case should probably be an array w/ relative access (ie.
|
||||
* all elements are def'd or use'd), and that can't be larger than
|
||||
* the number of registers.
|
||||
*
|
||||
* NOTE we could declare this on the stack if needed, but I don't
|
||||
* think there is a need for nested iterators.
|
||||
*/
|
||||
unsigned namebuf[NUM_REGS];
|
||||
unsigned namecnt, nameidx;
|
||||
};
|
||||
|
||||
static inline int
|
||||
@@ -182,6 +194,153 @@ writes_gpr(struct ir3_instruction *instr)
|
||||
return true;
|
||||
}
|
||||
|
||||
#define NO_NAME ~0
|
||||
|
||||
/*
|
||||
* Iterators to iterate the vreg names of an instructions def's and use's
|
||||
*/
|
||||
|
||||
static inline unsigned
|
||||
__ra_name_cnt(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
|
||||
{
|
||||
if (!instr)
|
||||
return 0;
|
||||
|
||||
/* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */
|
||||
if (!writes_gpr(instr) || (instr->regs[0]->flags & IR3_REG_ARRAY))
|
||||
return 0;
|
||||
|
||||
/* in scalar pass, we aren't considering virtual register classes, ie.
|
||||
* if an instruction writes a vec2, then it defines two different scalar
|
||||
* register names.
|
||||
*/
|
||||
if (ctx->scalar_pass)
|
||||
return dest_regs(instr);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define foreach_name_n(__name, __n, __ctx, __instr) \
|
||||
for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \
|
||||
(__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++)
|
||||
|
||||
#define foreach_name(__name, __ctx, __instr) \
|
||||
foreach_name_n(__name, __n, __ctx, __instr)
|
||||
|
||||
static inline unsigned
|
||||
__ra_itr_pop(struct ir3_ra_ctx *ctx)
|
||||
{
|
||||
if (ctx->nameidx < ctx->namecnt)
|
||||
return ctx->namebuf[ctx->nameidx++];
|
||||
return NO_NAME;
|
||||
}
|
||||
|
||||
static inline void
|
||||
__ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name)
|
||||
{
|
||||
assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf));
|
||||
ctx->namebuf[ctx->namecnt++] = name;
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
__ra_init_def_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
|
||||
{
|
||||
/* nested use is not supported: */
|
||||
assert(ctx->namecnt == ctx->nameidx);
|
||||
|
||||
ctx->namecnt = ctx->nameidx = 0;
|
||||
|
||||
if (!writes_gpr(instr))
|
||||
return NO_NAME;
|
||||
|
||||
struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
|
||||
struct ir3_register *dst = instr->regs[0];
|
||||
|
||||
if (dst->flags & IR3_REG_ARRAY) {
|
||||
struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id);
|
||||
|
||||
/* indirect write is treated like a write to all array
|
||||
* elements, since we don't know which one is actually
|
||||
* written:
|
||||
*/
|
||||
if (dst->flags & IR3_REG_RELATIV) {
|
||||
for (unsigned i = 0; i < arr->length; i++) {
|
||||
__ra_itr_push(ctx, arr->base + i);
|
||||
}
|
||||
} else {
|
||||
__ra_itr_push(ctx, arr->base + dst->array.offset);
|
||||
debug_assert(dst->array.offset < arr->length);
|
||||
}
|
||||
} else if (id->defn == instr) {
|
||||
foreach_name_n (name, i, ctx, instr) {
|
||||
/* tex instructions actually have a wrmask, and
|
||||
* don't touch masked out components. We can't do
|
||||
* anything useful about that in the first pass,
|
||||
* but in the scalar pass we can realize these
|
||||
* registers are available:
|
||||
*/
|
||||
if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
|
||||
!(instr->regs[0]->wrmask & (1 << i)))
|
||||
continue;
|
||||
__ra_itr_push(ctx, name);
|
||||
}
|
||||
}
|
||||
|
||||
return __ra_itr_pop(ctx);
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
__ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
|
||||
{
|
||||
/* nested use is not supported: */
|
||||
assert(ctx->namecnt == ctx->nameidx);
|
||||
|
||||
ctx->namecnt = ctx->nameidx = 0;
|
||||
|
||||
struct ir3_register *reg;
|
||||
foreach_src (reg, instr) {
|
||||
if (reg->flags & IR3_REG_ARRAY) {
|
||||
struct ir3_array *arr =
|
||||
ir3_lookup_array(ctx->ir, reg->array.id);
|
||||
|
||||
/* indirect read is treated like a read from all array
|
||||
* elements, since we don't know which one is actually
|
||||
* read:
|
||||
*/
|
||||
if (reg->flags & IR3_REG_RELATIV) {
|
||||
for (unsigned i = 0; i < arr->length; i++) {
|
||||
__ra_itr_push(ctx, arr->base + i);
|
||||
}
|
||||
} else {
|
||||
__ra_itr_push(ctx, arr->base + reg->array.offset);
|
||||
debug_assert(reg->array.offset < arr->length);
|
||||
}
|
||||
} else {
|
||||
foreach_name_n (name, i, ctx, reg->instr) {
|
||||
/* split takes a src w/ wrmask potentially greater
|
||||
* than 0x1, but it really only cares about a single
|
||||
* component. This shows up in splits coming out of
|
||||
* a tex instruction w/ wrmask=.z, for example.
|
||||
*/
|
||||
if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) &&
|
||||
!(i == instr->split.off))
|
||||
continue;
|
||||
__ra_itr_push(ctx, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return __ra_itr_pop(ctx);
|
||||
}
|
||||
|
||||
#define foreach_def(__name, __ctx, __instr) \
|
||||
for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \
|
||||
__name != NO_NAME; __name = __ra_itr_pop(__ctx))
|
||||
|
||||
#define foreach_use(__name, __ctx, __instr) \
|
||||
for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \
|
||||
__name != NO_NAME; __name = __ra_itr_pop(__ctx))
|
||||
|
||||
int ra_size_to_class(unsigned sz, bool half, bool high);
|
||||
|
||||
#endif /* IR3_RA_H_ */
|
||||
|
||||
Reference in New Issue
Block a user