From 2b0536e921df79789ee65da004dfaeaa5ac99155 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Mon, 10 Feb 2025 17:32:01 +0100 Subject: [PATCH] aco: remove block_kind_continue_or_break workaround and tests Part-of: --- src/amd/compiler/aco_insert_exec_mask.cpp | 17 - .../compiler/aco_instruction_selection.cpp | 54 +-- src/amd/compiler/aco_ir.h | 17 +- src/amd/compiler/aco_opt_value_numbering.cpp | 2 - src/amd/compiler/aco_print_ir.cpp | 2 - src/amd/compiler/aco_repair_ssa.cpp | 3 +- src/amd/compiler/tests/test_isel.cpp | 379 ------------------ 7 files changed, 23 insertions(+), 451 deletions(-) diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index e3e32ce5666..3119581e702 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -643,23 +643,6 @@ add_branch_code(exec_ctx& ctx, Block* block) Pseudo_branch_instruction& branch = block->instructions.back()->branch(); branch.target[0] = block->linear_succs[0]; - } else if (block->kind & block_kind_continue_or_break) { - assert(ctx.program->blocks[ctx.program->blocks[block->linear_succs[1]].linear_succs[0]].kind & - block_kind_loop_header); - assert(ctx.program->blocks[ctx.program->blocks[block->linear_succs[0]].linear_succs[0]].kind & - block_kind_loop_exit); - assert(block->instructions.back()->opcode == aco_opcode::p_branch); - block->instructions.pop_back(); - - while (!(ctx.info[idx].exec.back().type & mask_type_loop)) - ctx.info[idx].exec.pop_back(); - - Temp cond = bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(s1, scc), - ctx.info[idx].exec.back().op, Operand::zero(bld.lm.bytes())) - .def(1) - .getTemp(); - bld.branch(aco_opcode::p_cbranch_nz, Operand(cond, scc), block->linear_succs[1], - block->linear_succs[0]); } else if (block->kind & block_kind_uniform) { Pseudo_branch_instruction& branch = block->instructions.back()->branch(); if (branch.opcode == aco_opcode::p_branch) { diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index c9aa65b5558..3a5520f9bc4 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -9898,52 +9898,24 @@ update_exec_info(isel_context* ctx) void end_loop(isel_context* ctx, loop_context* lc) { - // TODO: what if a loop ends with a unconditional or uniformly branched continue - // and this branch is never taken? + /* No need to check exec.potentially_empty_break/continue originating inside the loop. In the + * only case where it's possible at this point (divergent break after divergent continue), we + * should continue anyway. Terminate instructions cannot appear inside loops and demote inside + * divergent control flow requires WQM. + */ + assert(!ctx->cf_info.exec.potentially_empty_discard); + + /* Add the trivial continue. */ if (!ctx->cf_info.has_branch) { unsigned loop_header_idx = ctx->cf_info.parent_loop.header_idx; Builder bld(ctx->program, ctx->block); append_logical_end(ctx->block); - /* No need to check exec.potentially_empty_break/continue originating inside the loop. In the - * only case where it's possible at this point (divergent break after divergent continue), we - * should continue anyway. */ - if (ctx->cf_info.exec.potentially_empty_discard) { - /* Discards can result in code running with an empty exec mask. - * This would result in divergent breaks not ever being taken. As a - * workaround, break the loop when the loop mask is empty instead of - * always continuing. */ - ctx->block->kind |= (block_kind_continue_or_break | block_kind_uniform); - unsigned block_idx = ctx->block->index; - - /* create helper blocks to avoid critical edges */ - Block* break_block = ctx->program->create_and_insert_block(); - break_block->kind = block_kind_uniform; - bld.reset(break_block); - bld.branch(aco_opcode::p_branch); - add_linear_edge(block_idx, break_block); - add_linear_edge(break_block->index, &lc->loop_exit); - - Block* continue_block = ctx->program->create_and_insert_block(); - continue_block->kind = block_kind_uniform; - bld.reset(continue_block); - bld.branch(aco_opcode::p_branch); - add_linear_edge(block_idx, continue_block); - add_linear_edge(continue_block->index, &ctx->program->blocks[loop_header_idx]); - - if (!ctx->cf_info.has_divergent_branch) - add_logical_edge(block_idx, &ctx->program->blocks[loop_header_idx]); - ctx->block = &ctx->program->blocks[block_idx]; - - /* SGPR temporaries might need loop exit phis to be created. */ - ctx->program->should_repair_ssa = true; - } else { - ctx->block->kind |= (block_kind_continue | block_kind_uniform); - if (!ctx->cf_info.has_divergent_branch) - add_edge(ctx->block->index, &ctx->program->blocks[loop_header_idx]); - else - add_linear_edge(ctx->block->index, &ctx->program->blocks[loop_header_idx]); - } + ctx->block->kind |= (block_kind_continue | block_kind_uniform); + if (!ctx->cf_info.has_divergent_branch) + add_edge(ctx->block->index, &ctx->program->blocks[loop_header_idx]); + else + add_linear_edge(ctx->block->index, &ctx->program->blocks[loop_header_idx]); bld.reset(ctx->block); bld.branch(aco_opcode::p_branch); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index dcd76f76845..bd6fd877ae7 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1955,15 +1955,14 @@ enum block_kind { block_kind_loop_exit = 1 << 4, block_kind_continue = 1 << 5, block_kind_break = 1 << 6, - block_kind_continue_or_break = 1 << 7, - block_kind_branch = 1 << 8, - block_kind_merge = 1 << 9, - block_kind_invert = 1 << 10, - block_kind_discard_early_exit = 1 << 11, - block_kind_uses_discard = 1 << 12, - block_kind_resume = 1 << 13, - block_kind_export_end = 1 << 14, - block_kind_end_with_regs = 1 << 15, + block_kind_branch = 1 << 7, + block_kind_merge = 1 << 8, + block_kind_invert = 1 << 9, + block_kind_discard_early_exit = 1 << 10, + block_kind_uses_discard = 1 << 11, + block_kind_resume = 1 << 12, + block_kind_export_end = 1 << 13, + block_kind_end_with_regs = 1 << 14, }; /* CFG */ diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index 4ce7e027e64..5fa61e72bb4 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -461,8 +461,6 @@ value_numbering(Program* program) if (block.kind & block_kind_branch || block.kind & block_kind_loop_preheader || block.kind & block_kind_break || block.kind & block_kind_continue) ctx.exec_id++; - else if (block.kind & block_kind_continue_or_break) - ctx.exec_id += 2; } /* rename loop header phi operands */ diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index f5eb46b03c5..b26455f5f53 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -840,8 +840,6 @@ print_block_kind(uint16_t kind, FILE* output) fprintf(output, "continue, "); if (kind & block_kind_break) fprintf(output, "break, "); - if (kind & block_kind_continue_or_break) - fprintf(output, "continue_or_break, "); if (kind & block_kind_branch) fprintf(output, "branch, "); if (kind & block_kind_merge) diff --git a/src/amd/compiler/aco_repair_ssa.cpp b/src/amd/compiler/aco_repair_ssa.cpp index 5039cb6067c..8c69559c85c 100644 --- a/src/amd/compiler/aco_repair_ssa.cpp +++ b/src/amd/compiler/aco_repair_ssa.cpp @@ -89,7 +89,8 @@ create_phis(repair_state* state, Temp tmp, uint32_t use_block, uint32_t def_bloc /* If a logical dominator has a temporary, we don't need to create a phi and can just use * that temporary instead. For linear temporaries, we also need to check if it dominates in * the linear CFG, because logical dominators do not necessarily dominate a block in the - * linear CFG (for example, because of continue_or_break or empty exec skips). */ + * linear CFG (for example, because of empty exec skips). + */ uint32_t dom = block.index; bool skip_phis = false; do { diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index d53b9f9d60a..b6ad3803e43 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -854,179 +854,6 @@ BEGIN_TEST(isel.cf.hidden_continue) finish_isel_test(); END_TEST -/** - * if (divergent) { - * a = ... - * loop { - * if (uniform) { - * break - * } - * terminate_if - * } - * b = phi a - * } - */ -BEGIN_TEST(isel.cf.hidden_break) - if (!setup_nir_cs(GFX11)) - return; - - nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0)); - { - //>> s1: %brk = p_unit_test 1 - nir_def* brk = nir_unit_test_uniform_amd(nb, 1, 32, .base = 1); - nir_block* block; - nir_push_loop(nb); - { - nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 2)); - { - block = nir_cursor_current_block(nb->cursor); - nir_jump(nb, nir_jump_break); - } - nir_pop_if(nb, NULL); - //>> BB5 - //>> /* logical preds: BB4, / linear preds: BB4, / kind: uniform, continue_or_break, discard, */ - - nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 3)); - } - nir_pop_loop(nb, NULL); - - //>> BB8 - //! /* logical preds: BB3, / linear preds: BB3, BB6, / kind: uniform, loop-exit, */ - //! s1: %_ = p_linear_phi %brk, s1: undef - nir_phi_instr* phi = nir_phi_instr_create(nb->shader); - nir_def_init(&phi->instr, &phi->def, 1, 32); - nir_phi_instr_add_src(phi, block, brk); - nir_builder_instr_insert(nb, &phi->instr); - nir_unit_test_amd(nb, &phi->def); - } - nir_pop_if(nb, NULL); - nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 4); - - finish_isel_test(); -END_TEST - -/** - * loop { - * if (divergent) { - * a = loop_invariant_sgpr - * break - * } - * terminate_if - * } - * use(a) - */ -BEGIN_TEST(isel.cf.hidden_break_no_lcssa) - if (!setup_nir_cs(GFX11)) - return; - - nir_def* val; - nir_push_loop(nb); - { - //>> BB1 - //! /* logical preds: BB0, BB9, / linear preds: BB0, BB11, / kind: loop-header, branch, */ - //! s1: %val_header_phi = p_linear_phi s1: undef, %val_invert_phi - - nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 1)); - { - //>> BB2 - //! /* logical preds: BB1, / linear preds: BB1, / kind: break, */ - //! p_logical_start - //! s1: %val = p_parallelcopy 0 - val = nir_imm_zero(nb, 1, 32); - nir_jump(nb, nir_jump_break); - } - nir_pop_if(nb, NULL); - - //>> BB6 - //! /* logical preds: / linear preds: BB4, BB5, / kind: invert, */ - //! s1: %val_invert_phi = p_linear_phi %val, %val_header_phi - //>> BB9 - //! /* logical preds: BB7, / linear preds: BB7, BB8, / kind: uniform, continue_or_break, merge, discard, */ - nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2)); - } - nir_pop_loop(nb, NULL); - - //>> BB12 - //! /* logical preds: BB2, / linear preds: BB3, BB10, / kind: uniform, top-level, loop-exit, */ - //! s1: %val_exit_phi = p_linear_phi %val, %val_invert_phi - //! p_logical_start - //! p_unit_test 0, %val_exit_phi - nir_unit_test_amd(nb, val, .base = 0); - - finish_isel_test(); -END_TEST - -/** - * loop { - * use(phi(, a)) - * loop { - * terminate_if - * if (divergent) { - * a = loop_invariant_sgpr - * break - * } - * } - * } - */ -BEGIN_TEST(isel.cf.hidden_break_no_lcssa_header_phi) - if (!setup_nir_cs(GFX11)) - return; - - //>> p_startpgm - //! p_logical_start - //! s1: %init = p_unit_test 0 - nir_def* init = nir_unit_test_uniform_amd(nb, 1, 32, .base = 0); - - nir_def* val; - nir_phi_instr* phi; - nir_loop *loop = nir_push_loop(nb); - { - //>> BB1 - //! /* logical preds: BB0, BB19, / linear preds: BB0, BB19, / kind: uniform, loop-preheader, loop-header, */ - //! s1: %phi = p_linear_phi %init, %val_lcssa - //! p_logical_start - //! p_unit_test 1, %phi - phi = nir_phi_instr_create(nb->shader); - nir_def_init(&phi->instr, &phi->def, 1, 32); - nir_phi_instr_add_src(phi, init->parent_instr->block, init); - nir_unit_test_amd(nb, &phi->def, .base = 1); - - //>> BB2 - //! /* logical preds: BB1, BB13, / linear preds: BB1, BB15, / kind: uniform, loop-header, discard, */ - nir_push_loop(nb); - { - nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2)); - - nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 3)); - { - //>> BB4 - //! /* logical preds: BB3, / linear preds: BB3, / kind: break, */ - //! p_logical_start - //! s1: %val = p_parallelcopy 0 - val = nir_imm_zero(nb, 1, 32); - nir_jump(nb, nir_jump_break); - } - nir_pop_if(nb, NULL); - //>> BB13 - //! /* logical preds: BB11, / linear preds: BB11, BB12, / kind: uniform, continue_or_break, */ - } - nir_pop_loop(nb, NULL); - //>> BB16 - //! /* logical preds: BB4, / linear preds: BB5, BB14, / kind: uniform, loop-exit, */ - //! s1: %val_lcssa = p_linear_phi %val, %_ - //>> BB20 - //! /* logical preds: BB17, / linear preds: BB17, / kind: uniform, top-level, loop-exit, */ - - nir_phi_instr_add_src(phi, nir_cursor_current_block(nb->cursor), val); - } - nir_pop_loop(nb, NULL); - - nb->cursor = nir_after_phis(nir_loop_first_block(loop)); - nir_builder_instr_insert(nb, &phi->instr); - - finish_isel_test(); -END_TEST - /** * if (divergent) { * a = @@ -1330,46 +1157,6 @@ BEGIN_TEST(isel.cf.empty_exec.divergent_if) finish_isel_test(); END_TEST -/* - * loop { - * terminate_if - * //potentially empty - * } - */ -BEGIN_TEST(isel.cf.empty_exec.loop_terminate) - if (!setup_nir_cs(GFX11)) - return; - - nir_push_loop(nb); - { - nir_break_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0)); - - //>> BB9 - //>> p_unit_test 1, %_ - //>> s2: %_ = p_unit_test 2 - //>> p_discard_if %_ - //>> p_cbranch_z %0:exec rarely_taken - nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 1); - nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2)); - - //>> BB10 - //>> p_unit_test 3, %_ - nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 3); - - //>> BB12 - //! /* logical preds: BB10, / linear preds: BB10, BB11, / kind: uniform, continue_or_break, */ - } - nir_pop_loop(nb, NULL); - - //>> BB15 - //! /* logical preds: BB2, / linear preds: BB3, BB13, / kind: uniform, top-level, loop-exit, */ - //! p_logical_start - //! p_unit_test 4, %_ - nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 4); - - finish_isel_test(); -END_TEST - /* * loop { * if (divergent) { @@ -1682,169 +1469,3 @@ BEGIN_TEST(isel.cf.empty_exec.terminate_then_loop) finish_isel_test(); END_TEST - -/* - * if (divergent) { - * terminate_if - * //potentially empty - * loop { - * terminate_if - * //potentially empty - * loop { - * terminate_if - * //potentially empty - * val = ... - * if (uniform) break - * break - * } - * if (uniform) break - * } - * } - * use(val) - */ -BEGIN_TEST(isel.cf.empty_exec.repair_ssa) - if (!setup_nir_cs(GFX11)) - return; - - nir_def* val_sgpr; - nir_def* val_vgpr; - nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0)); - { - //>> BB1 - //! /* logical preds: BB0, / linear preds: BB0, / kind: uniform, discard, */ - //>> s2: %_ = p_unit_test 1 - //>> p_cbranch_z %0:exec rarely_taken - nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 1)); - - nir_push_loop(nb); - { - //>> BB3 - //! /* logical preds: BB2, BB20, / linear preds: BB2, BB22, / kind: uniform, loop-header, discard, */ - //>> s2: %_ = p_unit_test 2 - //>> p_cbranch_z %0:exec rarely_taken - nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2)); - - nir_push_loop(nb); - { - //>> BB5 - //! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, loop-header, discard, */ - //>> s2: %_ = p_unit_test 3 - //>> p_cbranch_z %0:exec rarely_taken - nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 3)); - - //>> BB6 - //! /* logical preds: BB5, / linear preds: BB5, / kind: uniform, */ - //>> s1: %sgpr0 = p_parallelcopy 42 - //>> v1: %vgpr0 = v_mbcnt_hi_u32_b32_e64 %_, %_ - val_sgpr = nir_imm_int(nb, 42); - val_vgpr = nir_mbcnt_amd(nb, nir_imm_intN_t(nb, UINT64_MAX, 64), nir_imm_int(nb, 0)); - - //>> BB7 - //! /* logical preds: BB6, / linear preds: BB6, / kind: uniform, break, */ - nir_break_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 4)); - - //>> BB10 - //! /* logical preds: / linear preds: BB5, / kind: uniform, */ - //>> BB11 - //! /* logical preds: BB9, / linear preds: BB9, BB10, / kind: uniform, break, */ - //! s1: %sgpr1 = p_linear_phi %sgpr0, s1: undef - nir_jump(nb, nir_jump_break); - } - nir_pop_loop(nb, NULL); - //>> BB12 - //! /* logical preds: BB7, BB11, / linear preds: BB7, BB11, / kind: uniform, loop-exit, */ - //! s1: %sgpr2 = p_linear_phi %sgpr0, %sgpr1 - //>> BB13 - //! /* logical preds: / linear preds: BB3, / kind: uniform, */ - //>> BB14 - //! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, */ - //! s1: %sgpr3 = p_linear_phi %sgpr2, s1: undef - //>> p_cbranch_z %0:exec rarely_taken - - //>> BB15 - //! /* logical preds: BB14, / linear preds: BB14, / kind: uniform, */ - //>> p_unit_test 5, %sgpr3 - //>> p_unit_test 6, %vgpr3 - //>> p_unit_test 7, %sgpr3 - //>> p_unit_test 8, %vgpr3 - nir_unit_test_amd(nb, val_sgpr, .base = 5); - nir_unit_test_amd(nb, val_vgpr, .base = 6); - nir_unit_test_amd(nb, val_sgpr, .base = 7); - nir_unit_test_amd(nb, val_vgpr, .base = 8); - - //>> BB16 - //! /* logical preds: BB15, / linear preds: BB15, / kind: uniform, break, */ - nir_break_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 9)); - //>> BB20 - //! /* logical preds: BB18, / linear preds: BB18, BB19, / kind: uniform, continue_or_break, */ - } - nir_pop_loop(nb, NULL); - //>> BB23 - //! /* logical preds: BB16, / linear preds: BB16, BB21, / kind: uniform, loop-exit, */ - //>> BB24 - //! /* logical preds: / linear preds: BB1, / kind: uniform, */ - //>> BB25 - //! /* logical preds: BB23, / linear preds: BB23, BB24, / kind: uniform, */ - //! s1: %sgpr4 = p_linear_phi %sgpr3, s1: undef - } - nir_pop_if(nb, NULL); - //>> BB27 - //! /* logical preds: / linear preds: BB25, BB26, / kind: invert, */ - //! s1: %sgpr5 = p_linear_phi %sgpr4, s1: undef - //>> BB30 - //! /* logical preds: BB25, BB28, / linear preds: BB28, BB29, / kind: uniform, top-level, merge, */ - //! v1: %vgpr = p_phi %vgpr0, v1: undef - //! s1: %sgpr = p_linear_phi %sgpr5, %sgpr5 - val_sgpr = nir_if_phi(nb, val_sgpr, nir_undef(nb, 1, 32)); - val_vgpr = nir_if_phi(nb, val_vgpr, nir_undef(nb, 1, 32)); - - //>> p_unit_test 10, %sgpr - //>> p_unit_test 11, %vgpr - nir_unit_test_amd(nb, val_sgpr, .base = 10); - nir_unit_test_amd(nb, val_vgpr, .base = 11); - - finish_isel_test(); -END_TEST - -/* - * if (divergent) { - * loop { - * if (uniform) { - * terminate_if - * // exec potentially empty - * continue - * } - * // exec potentially empty - * break - * } - * } - */ -BEGIN_TEST(isel.cf.empty_exec.loop_uniform_continue) - if (!setup_nir_cs(GFX11)) - return; - - nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0)); - { - nir_push_loop(nb); - { - //>> BB2 - //! /* logical preds: BB1, BB3, / linear preds: BB1, BB4, / kind: uniform, loop-header, */ - nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 1)); - { - //>> BB3 - //! /* logical preds: BB2, / linear preds: BB2, / kind: continue, discard, */ - nir_terminate_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 2)); - nir_jump(nb, nir_jump_continue); - } - nir_pop_if(nb, NULL); - - //>> BB7 - //! /* logical preds: BB6, / linear preds: BB5, BB6, / kind: uniform, break, */ - nir_jump(nb, nir_jump_break); - } - nir_pop_loop(nb, NULL); - } - nir_pop_if(nb, NULL); - - finish_isel_test(); -END_TEST