aco: remove block_kind_continue_or_break workaround and tests

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33479>
This commit is contained in:
Daniel Schürmann
2025-02-10 17:32:01 +01:00
committed by Marge Bot
parent fa4eb37bf6
commit 2b0536e921
7 changed files with 23 additions and 451 deletions
-17
View File
@@ -643,23 +643,6 @@ add_branch_code(exec_ctx& ctx, Block* block)
Pseudo_branch_instruction& branch = block->instructions.back()->branch();
branch.target[0] = block->linear_succs[0];
} else if (block->kind & block_kind_continue_or_break) {
assert(ctx.program->blocks[ctx.program->blocks[block->linear_succs[1]].linear_succs[0]].kind &
block_kind_loop_header);
assert(ctx.program->blocks[ctx.program->blocks[block->linear_succs[0]].linear_succs[0]].kind &
block_kind_loop_exit);
assert(block->instructions.back()->opcode == aco_opcode::p_branch);
block->instructions.pop_back();
while (!(ctx.info[idx].exec.back().type & mask_type_loop))
ctx.info[idx].exec.pop_back();
Temp cond = bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(s1, scc),
ctx.info[idx].exec.back().op, Operand::zero(bld.lm.bytes()))
.def(1)
.getTemp();
bld.branch(aco_opcode::p_cbranch_nz, Operand(cond, scc), block->linear_succs[1],
block->linear_succs[0]);
} else if (block->kind & block_kind_uniform) {
Pseudo_branch_instruction& branch = block->instructions.back()->branch();
if (branch.opcode == aco_opcode::p_branch) {
+13 -41
View File
@@ -9898,52 +9898,24 @@ update_exec_info(isel_context* ctx)
void
end_loop(isel_context* ctx, loop_context* lc)
{
// TODO: what if a loop ends with a unconditional or uniformly branched continue
// and this branch is never taken?
/* No need to check exec.potentially_empty_break/continue originating inside the loop. In the
* only case where it's possible at this point (divergent break after divergent continue), we
* should continue anyway. Terminate instructions cannot appear inside loops and demote inside
* divergent control flow requires WQM.
*/
assert(!ctx->cf_info.exec.potentially_empty_discard);
/* Add the trivial continue. */
if (!ctx->cf_info.has_branch) {
unsigned loop_header_idx = ctx->cf_info.parent_loop.header_idx;
Builder bld(ctx->program, ctx->block);
append_logical_end(ctx->block);
/* No need to check exec.potentially_empty_break/continue originating inside the loop. In the
* only case where it's possible at this point (divergent break after divergent continue), we
* should continue anyway. */
if (ctx->cf_info.exec.potentially_empty_discard) {
/* Discards can result in code running with an empty exec mask.
* This would result in divergent breaks not ever being taken. As a
* workaround, break the loop when the loop mask is empty instead of
* always continuing. */
ctx->block->kind |= (block_kind_continue_or_break | block_kind_uniform);
unsigned block_idx = ctx->block->index;
/* create helper blocks to avoid critical edges */
Block* break_block = ctx->program->create_and_insert_block();
break_block->kind = block_kind_uniform;
bld.reset(break_block);
bld.branch(aco_opcode::p_branch);
add_linear_edge(block_idx, break_block);
add_linear_edge(break_block->index, &lc->loop_exit);
Block* continue_block = ctx->program->create_and_insert_block();
continue_block->kind = block_kind_uniform;
bld.reset(continue_block);
bld.branch(aco_opcode::p_branch);
add_linear_edge(block_idx, continue_block);
add_linear_edge(continue_block->index, &ctx->program->blocks[loop_header_idx]);
if (!ctx->cf_info.has_divergent_branch)
add_logical_edge(block_idx, &ctx->program->blocks[loop_header_idx]);
ctx->block = &ctx->program->blocks[block_idx];
/* SGPR temporaries might need loop exit phis to be created. */
ctx->program->should_repair_ssa = true;
} else {
ctx->block->kind |= (block_kind_continue | block_kind_uniform);
if (!ctx->cf_info.has_divergent_branch)
add_edge(ctx->block->index, &ctx->program->blocks[loop_header_idx]);
else
add_linear_edge(ctx->block->index, &ctx->program->blocks[loop_header_idx]);
}
ctx->block->kind |= (block_kind_continue | block_kind_uniform);
if (!ctx->cf_info.has_divergent_branch)
add_edge(ctx->block->index, &ctx->program->blocks[loop_header_idx]);
else
add_linear_edge(ctx->block->index, &ctx->program->blocks[loop_header_idx]);
bld.reset(ctx->block);
bld.branch(aco_opcode::p_branch);
+8 -9
View File
@@ -1955,15 +1955,14 @@ enum block_kind {
block_kind_loop_exit = 1 << 4,
block_kind_continue = 1 << 5,
block_kind_break = 1 << 6,
block_kind_continue_or_break = 1 << 7,
block_kind_branch = 1 << 8,
block_kind_merge = 1 << 9,
block_kind_invert = 1 << 10,
block_kind_discard_early_exit = 1 << 11,
block_kind_uses_discard = 1 << 12,
block_kind_resume = 1 << 13,
block_kind_export_end = 1 << 14,
block_kind_end_with_regs = 1 << 15,
block_kind_branch = 1 << 7,
block_kind_merge = 1 << 8,
block_kind_invert = 1 << 9,
block_kind_discard_early_exit = 1 << 10,
block_kind_uses_discard = 1 << 11,
block_kind_resume = 1 << 12,
block_kind_export_end = 1 << 13,
block_kind_end_with_regs = 1 << 14,
};
/* CFG */
@@ -461,8 +461,6 @@ value_numbering(Program* program)
if (block.kind & block_kind_branch || block.kind & block_kind_loop_preheader ||
block.kind & block_kind_break || block.kind & block_kind_continue)
ctx.exec_id++;
else if (block.kind & block_kind_continue_or_break)
ctx.exec_id += 2;
}
/* rename loop header phi operands */
-2
View File
@@ -840,8 +840,6 @@ print_block_kind(uint16_t kind, FILE* output)
fprintf(output, "continue, ");
if (kind & block_kind_break)
fprintf(output, "break, ");
if (kind & block_kind_continue_or_break)
fprintf(output, "continue_or_break, ");
if (kind & block_kind_branch)
fprintf(output, "branch, ");
if (kind & block_kind_merge)
+2 -1
View File
@@ -89,7 +89,8 @@ create_phis(repair_state* state, Temp tmp, uint32_t use_block, uint32_t def_bloc
/* If a logical dominator has a temporary, we don't need to create a phi and can just use
* that temporary instead. For linear temporaries, we also need to check if it dominates in
* the linear CFG, because logical dominators do not necessarily dominate a block in the
* linear CFG (for example, because of continue_or_break or empty exec skips). */
* linear CFG (for example, because of empty exec skips).
*/
uint32_t dom = block.index;
bool skip_phis = false;
do {
-379
View File
@@ -854,179 +854,6 @@ BEGIN_TEST(isel.cf.hidden_continue)
finish_isel_test();
END_TEST
/**
* if (divergent) {
* a = ...
* loop {
* if (uniform) {
* break
* }
* terminate_if
* }
* b = phi a
* }
*/
BEGIN_TEST(isel.cf.hidden_break)
if (!setup_nir_cs(GFX11))
return;
nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0));
{
//>> s1: %brk = p_unit_test 1
nir_def* brk = nir_unit_test_uniform_amd(nb, 1, 32, .base = 1);
nir_block* block;
nir_push_loop(nb);
{
nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 2));
{
block = nir_cursor_current_block(nb->cursor);
nir_jump(nb, nir_jump_break);
}
nir_pop_if(nb, NULL);
//>> BB5
//>> /* logical preds: BB4, / linear preds: BB4, / kind: uniform, continue_or_break, discard, */
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 3));
}
nir_pop_loop(nb, NULL);
//>> BB8
//! /* logical preds: BB3, / linear preds: BB3, BB6, / kind: uniform, loop-exit, */
//! s1: %_ = p_linear_phi %brk, s1: undef
nir_phi_instr* phi = nir_phi_instr_create(nb->shader);
nir_def_init(&phi->instr, &phi->def, 1, 32);
nir_phi_instr_add_src(phi, block, brk);
nir_builder_instr_insert(nb, &phi->instr);
nir_unit_test_amd(nb, &phi->def);
}
nir_pop_if(nb, NULL);
nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 4);
finish_isel_test();
END_TEST
/**
* loop {
* if (divergent) {
* a = loop_invariant_sgpr
* break
* }
* terminate_if
* }
* use(a)
*/
BEGIN_TEST(isel.cf.hidden_break_no_lcssa)
if (!setup_nir_cs(GFX11))
return;
nir_def* val;
nir_push_loop(nb);
{
//>> BB1
//! /* logical preds: BB0, BB9, / linear preds: BB0, BB11, / kind: loop-header, branch, */
//! s1: %val_header_phi = p_linear_phi s1: undef, %val_invert_phi
nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 1));
{
//>> BB2
//! /* logical preds: BB1, / linear preds: BB1, / kind: break, */
//! p_logical_start
//! s1: %val = p_parallelcopy 0
val = nir_imm_zero(nb, 1, 32);
nir_jump(nb, nir_jump_break);
}
nir_pop_if(nb, NULL);
//>> BB6
//! /* logical preds: / linear preds: BB4, BB5, / kind: invert, */
//! s1: %val_invert_phi = p_linear_phi %val, %val_header_phi
//>> BB9
//! /* logical preds: BB7, / linear preds: BB7, BB8, / kind: uniform, continue_or_break, merge, discard, */
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2));
}
nir_pop_loop(nb, NULL);
//>> BB12
//! /* logical preds: BB2, / linear preds: BB3, BB10, / kind: uniform, top-level, loop-exit, */
//! s1: %val_exit_phi = p_linear_phi %val, %val_invert_phi
//! p_logical_start
//! p_unit_test 0, %val_exit_phi
nir_unit_test_amd(nb, val, .base = 0);
finish_isel_test();
END_TEST
/**
* loop {
* use(phi(, a))
* loop {
* terminate_if
* if (divergent) {
* a = loop_invariant_sgpr
* break
* }
* }
* }
*/
BEGIN_TEST(isel.cf.hidden_break_no_lcssa_header_phi)
if (!setup_nir_cs(GFX11))
return;
//>> p_startpgm
//! p_logical_start
//! s1: %init = p_unit_test 0
nir_def* init = nir_unit_test_uniform_amd(nb, 1, 32, .base = 0);
nir_def* val;
nir_phi_instr* phi;
nir_loop *loop = nir_push_loop(nb);
{
//>> BB1
//! /* logical preds: BB0, BB19, / linear preds: BB0, BB19, / kind: uniform, loop-preheader, loop-header, */
//! s1: %phi = p_linear_phi %init, %val_lcssa
//! p_logical_start
//! p_unit_test 1, %phi
phi = nir_phi_instr_create(nb->shader);
nir_def_init(&phi->instr, &phi->def, 1, 32);
nir_phi_instr_add_src(phi, init->parent_instr->block, init);
nir_unit_test_amd(nb, &phi->def, .base = 1);
//>> BB2
//! /* logical preds: BB1, BB13, / linear preds: BB1, BB15, / kind: uniform, loop-header, discard, */
nir_push_loop(nb);
{
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2));
nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 3));
{
//>> BB4
//! /* logical preds: BB3, / linear preds: BB3, / kind: break, */
//! p_logical_start
//! s1: %val = p_parallelcopy 0
val = nir_imm_zero(nb, 1, 32);
nir_jump(nb, nir_jump_break);
}
nir_pop_if(nb, NULL);
//>> BB13
//! /* logical preds: BB11, / linear preds: BB11, BB12, / kind: uniform, continue_or_break, */
}
nir_pop_loop(nb, NULL);
//>> BB16
//! /* logical preds: BB4, / linear preds: BB5, BB14, / kind: uniform, loop-exit, */
//! s1: %val_lcssa = p_linear_phi %val, %_
//>> BB20
//! /* logical preds: BB17, / linear preds: BB17, / kind: uniform, top-level, loop-exit, */
nir_phi_instr_add_src(phi, nir_cursor_current_block(nb->cursor), val);
}
nir_pop_loop(nb, NULL);
nb->cursor = nir_after_phis(nir_loop_first_block(loop));
nir_builder_instr_insert(nb, &phi->instr);
finish_isel_test();
END_TEST
/**
* if (divergent) {
* a =
@@ -1330,46 +1157,6 @@ BEGIN_TEST(isel.cf.empty_exec.divergent_if)
finish_isel_test();
END_TEST
/*
* loop {
* terminate_if
* //potentially empty
* }
*/
BEGIN_TEST(isel.cf.empty_exec.loop_terminate)
if (!setup_nir_cs(GFX11))
return;
nir_push_loop(nb);
{
nir_break_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0));
//>> BB9
//>> p_unit_test 1, %_
//>> s2: %_ = p_unit_test 2
//>> p_discard_if %_
//>> p_cbranch_z %0:exec rarely_taken
nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 1);
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2));
//>> BB10
//>> p_unit_test 3, %_
nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 3);
//>> BB12
//! /* logical preds: BB10, / linear preds: BB10, BB11, / kind: uniform, continue_or_break, */
}
nir_pop_loop(nb, NULL);
//>> BB15
//! /* logical preds: BB2, / linear preds: BB3, BB13, / kind: uniform, top-level, loop-exit, */
//! p_logical_start
//! p_unit_test 4, %_
nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 4);
finish_isel_test();
END_TEST
/*
* loop {
* if (divergent) {
@@ -1682,169 +1469,3 @@ BEGIN_TEST(isel.cf.empty_exec.terminate_then_loop)
finish_isel_test();
END_TEST
/*
* if (divergent) {
* terminate_if
* //potentially empty
* loop {
* terminate_if
* //potentially empty
* loop {
* terminate_if
* //potentially empty
* val = ...
* if (uniform) break
* break
* }
* if (uniform) break
* }
* }
* use(val)
*/
BEGIN_TEST(isel.cf.empty_exec.repair_ssa)
if (!setup_nir_cs(GFX11))
return;
nir_def* val_sgpr;
nir_def* val_vgpr;
nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0));
{
//>> BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: uniform, discard, */
//>> s2: %_ = p_unit_test 1
//>> p_cbranch_z %0:exec rarely_taken
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 1));
nir_push_loop(nb);
{
//>> BB3
//! /* logical preds: BB2, BB20, / linear preds: BB2, BB22, / kind: uniform, loop-header, discard, */
//>> s2: %_ = p_unit_test 2
//>> p_cbranch_z %0:exec rarely_taken
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2));
nir_push_loop(nb);
{
//>> BB5
//! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, loop-header, discard, */
//>> s2: %_ = p_unit_test 3
//>> p_cbranch_z %0:exec rarely_taken
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 3));
//>> BB6
//! /* logical preds: BB5, / linear preds: BB5, / kind: uniform, */
//>> s1: %sgpr0 = p_parallelcopy 42
//>> v1: %vgpr0 = v_mbcnt_hi_u32_b32_e64 %_, %_
val_sgpr = nir_imm_int(nb, 42);
val_vgpr = nir_mbcnt_amd(nb, nir_imm_intN_t(nb, UINT64_MAX, 64), nir_imm_int(nb, 0));
//>> BB7
//! /* logical preds: BB6, / linear preds: BB6, / kind: uniform, break, */
nir_break_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 4));
//>> BB10
//! /* logical preds: / linear preds: BB5, / kind: uniform, */
//>> BB11
//! /* logical preds: BB9, / linear preds: BB9, BB10, / kind: uniform, break, */
//! s1: %sgpr1 = p_linear_phi %sgpr0, s1: undef
nir_jump(nb, nir_jump_break);
}
nir_pop_loop(nb, NULL);
//>> BB12
//! /* logical preds: BB7, BB11, / linear preds: BB7, BB11, / kind: uniform, loop-exit, */
//! s1: %sgpr2 = p_linear_phi %sgpr0, %sgpr1
//>> BB13
//! /* logical preds: / linear preds: BB3, / kind: uniform, */
//>> BB14
//! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, */
//! s1: %sgpr3 = p_linear_phi %sgpr2, s1: undef
//>> p_cbranch_z %0:exec rarely_taken
//>> BB15
//! /* logical preds: BB14, / linear preds: BB14, / kind: uniform, */
//>> p_unit_test 5, %sgpr3
//>> p_unit_test 6, %vgpr3
//>> p_unit_test 7, %sgpr3
//>> p_unit_test 8, %vgpr3
nir_unit_test_amd(nb, val_sgpr, .base = 5);
nir_unit_test_amd(nb, val_vgpr, .base = 6);
nir_unit_test_amd(nb, val_sgpr, .base = 7);
nir_unit_test_amd(nb, val_vgpr, .base = 8);
//>> BB16
//! /* logical preds: BB15, / linear preds: BB15, / kind: uniform, break, */
nir_break_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 9));
//>> BB20
//! /* logical preds: BB18, / linear preds: BB18, BB19, / kind: uniform, continue_or_break, */
}
nir_pop_loop(nb, NULL);
//>> BB23
//! /* logical preds: BB16, / linear preds: BB16, BB21, / kind: uniform, loop-exit, */
//>> BB24
//! /* logical preds: / linear preds: BB1, / kind: uniform, */
//>> BB25
//! /* logical preds: BB23, / linear preds: BB23, BB24, / kind: uniform, */
//! s1: %sgpr4 = p_linear_phi %sgpr3, s1: undef
}
nir_pop_if(nb, NULL);
//>> BB27
//! /* logical preds: / linear preds: BB25, BB26, / kind: invert, */
//! s1: %sgpr5 = p_linear_phi %sgpr4, s1: undef
//>> BB30
//! /* logical preds: BB25, BB28, / linear preds: BB28, BB29, / kind: uniform, top-level, merge, */
//! v1: %vgpr = p_phi %vgpr0, v1: undef
//! s1: %sgpr = p_linear_phi %sgpr5, %sgpr5
val_sgpr = nir_if_phi(nb, val_sgpr, nir_undef(nb, 1, 32));
val_vgpr = nir_if_phi(nb, val_vgpr, nir_undef(nb, 1, 32));
//>> p_unit_test 10, %sgpr
//>> p_unit_test 11, %vgpr
nir_unit_test_amd(nb, val_sgpr, .base = 10);
nir_unit_test_amd(nb, val_vgpr, .base = 11);
finish_isel_test();
END_TEST
/*
* if (divergent) {
* loop {
* if (uniform) {
* terminate_if
* // exec potentially empty
* continue
* }
* // exec potentially empty
* break
* }
* }
*/
BEGIN_TEST(isel.cf.empty_exec.loop_uniform_continue)
if (!setup_nir_cs(GFX11))
return;
nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0));
{
nir_push_loop(nb);
{
//>> BB2
//! /* logical preds: BB1, BB3, / linear preds: BB1, BB4, / kind: uniform, loop-header, */
nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 1));
{
//>> BB3
//! /* logical preds: BB2, / linear preds: BB2, / kind: continue, discard, */
nir_terminate_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 2));
nir_jump(nb, nir_jump_continue);
}
nir_pop_if(nb, NULL);
//>> BB7
//! /* logical preds: BB6, / linear preds: BB5, BB6, / kind: uniform, break, */
nir_jump(nb, nir_jump_break);
}
nir_pop_loop(nb, NULL);
}
nir_pop_if(nb, NULL);
finish_isel_test();
END_TEST