diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index 84d27119ee9..8925ffb07b9 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -564,7 +564,7 @@ formats = [("pseudo", [Format.PSEUDO], list(itertools.product(range(5), range(6) ("mtbuf", [Format.MTBUF], [(0, 4), (1, 3)]), ("mimg", [Format.MIMG], itertools.product([0, 1], [3, 4, 5, 6, 7])), ("exp", [Format.EXP], [(0, 4), (0, 5)]), - ("branch", [Format.PSEUDO_BRANCH], itertools.product([1], [0, 1])), + ("branch", [Format.PSEUDO_BRANCH], [(0, 0), (0, 1)]), ("barrier", [Format.PSEUDO_BARRIER], [(0, 0)]), ("reduction", [Format.PSEUDO_REDUCTION], [(3, 3)]), ("vop1", [Format.VOP1], [(0, 0), (1, 1), (1, 2), (2, 2)]), diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index 15eb02ea9e9..ef2ddd3d9b0 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -637,8 +637,8 @@ add_branch_code(exec_ctx& ctx, Block* block) if (need_parallelcopy) bld.copy(Definition(exec, bld.lm), ctx.info[idx].exec.back().op); - bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2), Operand(exec, bld.lm), - block->linear_succs[1], block->linear_succs[0]); + bld.branch(aco_opcode::p_cbranch_nz, Operand(exec, bld.lm), block->linear_succs[1], + block->linear_succs[0]); } else if (block->kind & block_kind_uniform) { Pseudo_branch_instruction& branch = block->instructions.back()->branch(); if (branch.opcode == aco_opcode::p_branch) { @@ -671,7 +671,7 @@ add_branch_code(exec_ctx& ctx, Block* block) /* add next current exec to the stack */ ctx.info[idx].exec.emplace_back(Operand(exec, bld.lm), mask_type); - Builder::Result r = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(exec, bld.lm), + Builder::Result r = bld.branch(aco_opcode::p_cbranch_z, Operand(exec, bld.lm), block->linear_succs[1], block->linear_succs[0]); r->branch().rarely_taken = branch->branch().rarely_taken; r->branch().never_taken = branch->branch().never_taken; @@ -685,7 +685,7 @@ add_branch_code(exec_ctx& ctx, Block* block) bld.sop2(Builder::s_andn2, Definition(exec, bld.lm), bld.def(s1, scc), orig_exec, Operand(exec, bld.lm)); - Builder::Result r = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(exec, bld.lm), + Builder::Result r = bld.branch(aco_opcode::p_cbranch_z, Operand(exec, bld.lm), block->linear_succs[1], block->linear_succs[0]); r->branch().rarely_taken = branch->branch().rarely_taken; r->branch().never_taken = branch->branch().never_taken; @@ -713,7 +713,7 @@ add_branch_code(exec_ctx& ctx, Block* block) bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes())); } - bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2), bld.scc(cond), block->linear_succs[1], + bld.branch(aco_opcode::p_cbranch_nz, bld.scc(cond), block->linear_succs[1], block->linear_succs[0]); } else if (block->kind & block_kind_continue) { assert(block->instructions.back()->opcode == aco_opcode::p_branch); @@ -739,7 +739,7 @@ add_branch_code(exec_ctx& ctx, Block* block) bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes())); } - bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2), bld.scc(cond), block->linear_succs[1], + bld.branch(aco_opcode::p_cbranch_nz, bld.scc(cond), block->linear_succs[1], block->linear_succs[0]); } else { unreachable("unknown/invalid block type"); diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 65ce9814385..5934949d494 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -10125,7 +10125,7 @@ begin_loop(isel_context* ctx, loop_context* lc) append_logical_end(ctx->block); ctx->block->kind |= block_kind_loop_preheader | block_kind_uniform; Builder bld(ctx->program, ctx->block); - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); unsigned loop_preheader_idx = ctx->block->index; lc->loop_exit.kind |= (block_kind_loop_exit | (ctx->block->kind & block_kind_top_level)); @@ -10201,14 +10201,14 @@ end_loop(isel_context* ctx, loop_context* lc) Block* break_block = ctx->program->create_and_insert_block(); break_block->kind = block_kind_uniform; bld.reset(break_block); - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); add_linear_edge(block_idx, break_block); add_linear_edge(break_block->index, &lc->loop_exit); Block* continue_block = ctx->program->create_and_insert_block(); continue_block->kind = block_kind_uniform; bld.reset(continue_block); - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); add_linear_edge(block_idx, continue_block); add_linear_edge(continue_block->index, &ctx->program->blocks[loop_header_idx]); @@ -10227,7 +10227,7 @@ end_loop(isel_context* ctx, loop_context* lc) } bld.reset(ctx->block); - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); } ctx->cf_info.has_branch = false; @@ -10271,7 +10271,7 @@ emit_loop_jump(isel_context* ctx, bool is_break) /* uniform break - directly jump out of the loop */ ctx->block->kind |= block_kind_uniform; ctx->cf_info.has_branch = true; - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); add_linear_edge(idx, logical_target); return; } @@ -10290,7 +10290,7 @@ emit_loop_jump(isel_context* ctx, bool is_break) /* uniform continue - directly jump to the loop header */ ctx->block->kind |= block_kind_uniform; ctx->cf_info.has_branch = true; - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); add_linear_edge(idx, logical_target); return; } @@ -10310,7 +10310,7 @@ emit_loop_jump(isel_context* ctx, bool is_break) } /* remove critical edges from linear CFG */ - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); Block* break_block = ctx->program->create_and_insert_block(); break_block->kind |= block_kind_uniform; add_linear_edge(idx, break_block); @@ -10319,7 +10319,7 @@ emit_loop_jump(isel_context* ctx, bool is_break) logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx]; add_linear_edge(break_block->index, logical_target); bld.reset(break_block); - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); Block* continue_block = ctx->program->create_and_insert_block(); add_linear_edge(idx, continue_block); @@ -10436,8 +10436,7 @@ begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond, /* branch to linear then block */ assert(cond.regClass() == ctx->program->lane_mask); aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 1)); - branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); + branch.reset(create_instruction(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 0)); branch->operands[0] = Operand(cond); bool never_taken = sel_ctrl == nir_selection_control_divergent_always_taken && @@ -10479,8 +10478,7 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic, append_logical_end(BB_then_logical); /* branch from logical then block to invert block */ aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); - branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); BB_then_logical->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then_logical->index, &ic->BB_invert); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -10495,8 +10493,7 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic, BB_then_linear->kind |= block_kind_uniform; add_linear_edge(ic->BB_if_idx, BB_then_linear); /* branch from linear then block to invert block */ - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); - branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); BB_then_linear->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then_linear->index, &ic->BB_invert); @@ -10505,8 +10502,7 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic, ic->invert_idx = ctx->block->index; /* branch to linear else block (skip else) */ - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); - branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); bool never_taken = sel_ctrl == nir_selection_control_divergent_always_taken && !(ctx->cf_info.exec.potentially_empty_discard || ctx->cf_info.exec.potentially_empty_break || @@ -10539,8 +10535,7 @@ end_divergent_if(isel_context* ctx, if_context* ic) /* branch from logical else block to endif block */ aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); - branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); BB_else_logical->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_else_logical->index, &ic->BB_endif); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -10557,8 +10552,7 @@ end_divergent_if(isel_context* ctx, if_context* ic) add_linear_edge(ic->invert_idx, BB_else_linear); /* branch from linear else block to endif block */ - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); - branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); BB_else_linear->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_else_linear->index, &ic->BB_endif); @@ -10587,8 +10581,7 @@ begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond) aco_ptr branch; aco_opcode branch_opcode = aco_opcode::p_cbranch_z; - branch.reset(create_instruction(branch_opcode, Format::PSEUDO_BRANCH, 1, 1)); - branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); + branch.reset(create_instruction(branch_opcode, Format::PSEUDO_BRANCH, 1, 0)); if (cond.id()) { branch->operands[0] = Operand(cond); branch->operands[0].setPrecolored(scc); @@ -10626,8 +10619,7 @@ begin_uniform_if_else(isel_context* ctx, if_context* ic, bool logical_else) append_logical_end(BB_then); /* branch from then block to endif block */ aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); - branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); BB_then->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_then->index, &ic->BB_endif); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -10665,8 +10657,7 @@ end_uniform_if(isel_context* ctx, if_context* ic, bool logical_else) append_logical_end(BB_else); /* branch from then block to endif block */ aco_ptr branch; - branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1)); - branch->definitions[0] = Definition(ctx->program->allocateTmp(s2)); + branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0)); BB_else->instructions.emplace_back(std::move(branch)); add_linear_edge(BB_else->index, &ic->BB_endif); if (logical_else && !ctx->cf_info.parent_loop.has_divergent_branch) diff --git a/src/amd/compiler/aco_jump_threading.cpp b/src/amd/compiler/aco_jump_threading.cpp index ec3610f2ddc..3cad5c61723 100644 --- a/src/amd/compiler/aco_jump_threading.cpp +++ b/src/amd/compiler/aco_jump_threading.cpp @@ -322,8 +322,6 @@ try_merge_break_with_continue(jump_threading_ctx& ctx, Block* block) merge->instructions.back()->branch().target[0] = merge->index; std::swap(merge->instructions.back(), block->instructions.back()); - std::swap(merge->instructions.back()->definitions[0], - block->instructions.back()->definitions[0]); block->linear_succs.clear(); block->linear_succs.push_back(merge->index); diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 46f00dbfb51..20bfaf51b6f 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -352,7 +352,6 @@ process_live_temps_per_block(live_ctx& ctx, Block* block) } block->live_in_demand = new_demand; - block->live_in_demand.sgpr += 2; /* Add 2 SGPRs for potential long-jumps. */ block->register_demand.update(block->live_in_demand); ctx.program->max_reg_demand.update(block->register_demand); ctx.handled_once = std::min(ctx.handled_once, block->index); diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 3de1a813487..f3083fc86b4 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -3063,38 +3063,12 @@ register_allocation(Program* program, ra_test_policy policy) get_regs_for_phis(ctx, block, register_file, instructions, program->live.live_in[block.index]); - /* If this is a merge block, the state of the register file at the branch instruction of the - * predecessors corresponds to the state after phis at the merge block. So, we allocate a - * register for the predecessor's branch definitions as if there was a phi. - */ - if (!block.linear_preds.empty() && - (block.linear_preds.size() != 1 || - program->blocks[block.linear_preds[0]].linear_succs.size() == 1)) { - PhysReg br_reg = get_reg_phi(ctx, program->live.live_in[block.index], register_file, - instructions, block, ctx.phi_dummy, Temp(0, s2)); - for (unsigned pred : block.linear_preds) { - aco_ptr& br = program->blocks[pred].instructions.back(); - - assert(br->definitions.size() == 1 && br->definitions[0].regClass() == s2 && - br->definitions[0].isKill()); - - br->definitions[0].setFixed(br_reg); - } - } - /* Handle all other instructions of the block */ auto NonPhi = [](aco_ptr& instr) -> bool { return instr && !is_phi(instr); }; auto instr_it = std::find_if(block.instructions.begin(), block.instructions.end(), NonPhi); for (; instr_it != block.instructions.end(); ++instr_it) { aco_ptr& instr = *instr_it; std::vector> parallelcopy; - - if (instr->opcode == aco_opcode::p_branch) { - /* unconditional branches are handled after phis of the target */ - instructions.emplace_back(std::move(instr)); - break; - } - assert(!is_phi(instr)); /* handle operands */ diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 13f63386b3f..293d9736998 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -1515,8 +1515,7 @@ validate_ra(Program* program) } } - if (!instr->isBranch() || block.linear_succs.size() != 1) - err |= validate_instr_defs(program, regs, assignments, loc, instr); + err |= validate_instr_defs(program, regs, assignments, loc, instr); if (!is_phi(instr)) { for (const Operand& op : instr->operands) { @@ -1527,13 +1526,6 @@ validate_ra(Program* program) regs[op.physReg().reg_b + j] = 0; } } - } else if (block.linear_preds.size() != 1 || - program->blocks[block.linear_preds[0]].linear_succs.size() == 1) { - for (unsigned pred : block.linear_preds) { - aco_ptr& br = program->blocks[pred].instructions.back(); - assert(br->isBranch()); - err |= validate_instr_defs(program, regs, assignments, loc, br); - } } } } diff --git a/src/amd/compiler/tests/helpers.cpp b/src/amd/compiler/tests/helpers.cpp index c6627a544a7..fe546156a7c 100644 --- a/src/amd/compiler/tests/helpers.cpp +++ b/src/amd/compiler/tests/helpers.cpp @@ -604,32 +604,30 @@ emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::functi b.reset(if_block); Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg), Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm)); - b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index, - then_linear->index); + b.branch(aco_opcode::p_cbranch_nz, then_logical->index, then_linear->index); b.reset(then_logical); b.pseudo(aco_opcode::p_logical_start); then(); b.pseudo(aco_opcode::p_logical_end); - b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), invert->index); + b.branch(aco_opcode::p_branch, invert->index); b.reset(then_linear); - b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), invert->index); + b.branch(aco_opcode::p_branch, invert->index); b.reset(invert); b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1), Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm)); - b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index, - else_linear->index); + b.branch(aco_opcode::p_cbranch_nz, else_logical->index, else_linear->index); b.reset(else_logical); b.pseudo(aco_opcode::p_logical_start); els(); b.pseudo(aco_opcode::p_logical_end); - b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), endif_block->index); + b.branch(aco_opcode::p_branch, endif_block->index); b.reset(else_linear); - b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), endif_block->index); + b.branch(aco_opcode::p_branch, endif_block->index); b.reset(endif_block); b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index 58317bc0d1b..f8b8361e813 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -422,7 +422,7 @@ BEGIN_TEST(isel.cf.unreachable_break.uniform_continue) //! s2: %zero = p_parallelcopy 0 //! s2: %_, s1: %cond:scc = s_and_b64 %zero, %0:exec //! p_logical_end - //! s2: %_ = p_cbranch_z %cond:scc + //! p_cbranch_z %cond:scc //! BB5 //! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, break, */ //>> BB6 @@ -504,7 +504,7 @@ BEGIN_TEST(isel.cf.unreachable_continue.mixed_break) //! p_logical_start //! s2: %cond = p_unit_test 5 //! p_logical_end - //! s2: %_ = p_cbranch_z %cond + //! p_cbranch_z %cond nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base=5)); { //>> BB5 @@ -604,7 +604,7 @@ BEGIN_TEST(isel.cf.unreachable_continue.nested_mixed_break) //! s2: %cond1 = p_unit_test 4 //! s2: %_, s1: %_:scc = s_and_b64 %cond1, %0:exec //! p_logical_end - //! s2: %_ = p_cbranch_z %_:scc + //! p_cbranch_z %_:scc nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base=4)); { //>> BB5 @@ -621,7 +621,7 @@ BEGIN_TEST(isel.cf.unreachable_continue.nested_mixed_break) //! p_logical_start //! s2: %cond2 = p_unit_test 5 //! p_logical_end - //! s2: %_ = p_cbranch_z %cond2 + //! p_cbranch_z %cond2 nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base=5)); { //>> BB8 @@ -677,7 +677,7 @@ BEGIN_TEST(isel.cf.unreachable_loop_exit) //>> s1: %_ = p_unit_test 0 //>> s2: %zero = p_parallelcopy 0 //>> s2: %_, s1: %cond:scc = s_and_b64 %zero, %0:exec - //>> s2: %_ = p_cbranch_z %cond:scc + //>> p_cbranch_z %cond:scc //! BB2 //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, break, */ //>> BB4 @@ -760,7 +760,7 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use) //>> s2: %cond = p_unit_test 2 //! s2: %_, s1: %_:scc = s_and_b64 %cond, %0:exec //! p_logical_end - //! s2: %_ = p_cbranch_z %_:scc + //! p_cbranch_z %_:scc nir_def *val; nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base=2)); { @@ -773,7 +773,7 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use) /* The contents of this branch is moved to the merge block. */ //>> BB14 //! /* logical preds: BB13, / linear preds: BB12, BB13, / kind: uniform, */ - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken //! BB15 //! /* logical preds: BB14, / linear preds: BB14, / kind: uniform, */ //! p_logical_start @@ -1165,7 +1165,7 @@ BEGIN_TEST(isel.cf.empty_exec.uniform_if) //>> BB0 //>> s2: %_ = p_unit_test 0 - //>> s2: %_ = p_cbranch_z %_:scc + //>> p_cbranch_z %_:scc nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 0)); { //>> BB1 @@ -1205,26 +1205,26 @@ BEGIN_TEST(isel.cf.empty_exec.divergent_if) //>> BB0 //>> s2: %_ = p_unit_test 0 - //>> s2: %_ = p_cbranch_z %_ + //>> p_cbranch_z %_ nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0)); { //>> BB1 //>> s2: %_ = p_unit_test 1 //>> p_discard_if %_ - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 1)); //>> p_unit_test 2, %_ nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 2); //>> s2: %_ = p_unit_test 3 - //>> s2: %_ = p_cbranch_z %_ + //>> p_cbranch_z %_ nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 3)); { //>> p_unit_test 4, %_ //>> s2: %_ = p_unit_test 5 //>> p_discard_if %_ - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 4); nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 5)); @@ -1236,7 +1236,7 @@ BEGIN_TEST(isel.cf.empty_exec.divergent_if) //>> p_unit_test 7, %_ //>> s2: %_ = p_unit_test 8 //>> p_discard_if %_ - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 7); nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 8)); @@ -1250,7 +1250,7 @@ BEGIN_TEST(isel.cf.empty_exec.divergent_if) //>> /* logical preds: / linear preds: BB1, / kind: uniform, */ //>> BB16 //! /* logical preds: BB14, / linear preds: BB14, BB15, / kind: uniform, */ - //>> s2: %35 = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken //>> p_unit_test 10, %_ nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 10); @@ -1284,7 +1284,7 @@ BEGIN_TEST(isel.cf.empty_exec.loop_terminate) //>> p_unit_test 0, %_ //>> s2: %_ = p_unit_test 1 //>> p_discard_if %_ - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 0); nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 1)); @@ -1337,7 +1337,7 @@ BEGIN_TEST(isel.cf.empty_exec.loop_break) //>> BB10 //! /* logical preds: BB8, / linear preds: BB8, BB9, / kind: uniform, merge, */ - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken //>> BB11 //>> p_unit_test 3, %_ nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 3); @@ -1397,7 +1397,7 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue) //>> BB13 //! /* logical preds: BB11, / linear preds: BB11, BB12, / kind: uniform, merge, */ - //>> s2: %23 = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken //>> BB14 //>> p_unit_test 3, %_ nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 3); @@ -1463,7 +1463,7 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue_then_break) nir_break_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 3)); //>> BB17 //! /* logical preds: BB15, / linear preds: BB15, BB16, / kind: uniform, merge, */ - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken //>> BB18 //! /* logical preds: BB17, / linear preds: BB17, / kind: uniform, */ @@ -1499,7 +1499,7 @@ BEGIN_TEST(isel.cf.empty_exec.terminate_then_uniform_if) //>> BB0 //>> s2: %_ = p_unit_test 0 - //>> s2: %_ = p_cbranch_z %_ + //>> p_cbranch_z %_ nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0)); { //>> BB1 @@ -1508,9 +1508,9 @@ BEGIN_TEST(isel.cf.empty_exec.terminate_then_uniform_if) //>> p_discard_if %_ nir_def* cond = nir_unit_test_uniform_amd(nb, 1, 1, .base = 1); nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2)); - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken - //>> s2: %_ = p_cbranch_z %_:scc + //>> p_cbranch_z %_:scc nir_push_if(nb, cond); { //>> p_unit_test 3, %2 @@ -1543,7 +1543,7 @@ BEGIN_TEST(isel.cf.empty_exec.terminate_then_divergent_if) //>> BB0 //>> s2: %_ = p_unit_test 0 - //>> s2: %_ = p_cbranch_z %_ + //>> p_cbranch_z %_ nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0)); { //>> BB1 @@ -1552,9 +1552,9 @@ BEGIN_TEST(isel.cf.empty_exec.terminate_then_divergent_if) //>> p_discard_if %_ nir_def* cond = nir_unit_test_divergent_amd(nb, 1, 1, .base = 1); nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2)); - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken - //>> s2: %_ = p_cbranch_z %_ + //>> p_cbranch_z %_ nir_push_if(nb, cond); { //>> BB3 @@ -1591,14 +1591,14 @@ BEGIN_TEST(isel.cf.empty_exec.terminate_then_loop) //>> BB0 //>> s2: %_ = p_unit_test 0 - //>> s2: %_ = p_cbranch_z %_ + //>> p_cbranch_z %_ nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0)); { //>> BB1 //>> s2: %_ = p_unit_test 1 //>> p_discard_if %_ nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 1)); - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken //>> BB2 //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, loop-preheader, */ @@ -1649,7 +1649,7 @@ BEGIN_TEST(isel.cf.empty_exec.repair_ssa) //>> BB1 //! /* logical preds: BB0, / linear preds: BB0, / kind: uniform, discard, */ //>> s2: %_ = p_unit_test 1 - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 1)); nir_push_loop(nb); @@ -1657,7 +1657,7 @@ BEGIN_TEST(isel.cf.empty_exec.repair_ssa) //>> BB3 //! /* logical preds: BB2, BB20, / linear preds: BB2, BB22, / kind: uniform, loop-header, discard, */ //>> s2: %_ = p_unit_test 2 - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2)); nir_push_loop(nb); @@ -1665,7 +1665,7 @@ BEGIN_TEST(isel.cf.empty_exec.repair_ssa) //>> BB5 //! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, loop-header, discard, */ //>> s2: %_ = p_unit_test 3 - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 3)); //>> BB6 @@ -1695,7 +1695,7 @@ BEGIN_TEST(isel.cf.empty_exec.repair_ssa) //>> BB14 //! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, */ //! s1: %sgpr3 = p_linear_phi %sgpr2, s1: undef - //>> s2: %_ = p_cbranch_z %0:exec rarely_taken + //>> p_cbranch_z %0:exec rarely_taken //>> BB15 //! /* logical preds: BB14, / linear preds: BB14, / kind: uniform, */ diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp b/src/amd/compiler/tests/test_optimizer_postRA.cpp index 51e33e33d21..9e6d78f7bbd 100644 --- a/src/amd/compiler/tests/test_optimizer_postRA.cpp +++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp @@ -11,7 +11,6 @@ using namespace aco; BEGIN_TEST(optimizer_postRA.vcmp) PhysReg reg_v0(256); PhysReg reg_s0(0); - PhysReg reg_s2(2); PhysReg reg_s4(4); //>> v1: %a:v[0] = p_startpgm @@ -28,18 +27,17 @@ BEGIN_TEST(optimizer_postRA.vcmp) /* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */ //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] - //! s2: %e:s[2-3] = p_cbranch_z %b:vcc - //! p_unit_test 0, %e:s[2-3] + //! p_cbranch_z %b:vcc + //! p_unit_test 0 auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), Operand(v_in, reg_v0)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); - auto br = - bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); - writeout(0, Operand(br, reg_s2)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp())); + writeout(0); } - //; del b, e + //; del b { /* When VCC is overwritten inbetween, don't optimize. */ @@ -47,19 +45,18 @@ BEGIN_TEST(optimizer_postRA.vcmp) //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec //! s2: %f:vcc = s_mov_b64 0 - //! s2: %e:s[2-3] = p_cbranch_z %d:scc - //! p_unit_test 1, %e:s[2-3], %f:vcc + //! p_cbranch_z %d:scc + //! p_unit_test 1, %f:vcc auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), Operand(v_in, reg_v0)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero()); - auto br = - bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); - writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp())); + writeout(1, Operand(ovrwr, vcc)); } - //; del b, c, d, e, f + //; del b, c, d, f { /* When part of VCC is overwritten inbetween, don't optimize. */ @@ -67,55 +64,52 @@ BEGIN_TEST(optimizer_postRA.vcmp) //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec //! s1: %f:vcc_hi = s_mov_b32 0 - //! s2: %e:s[2-3] = p_cbranch_z %d:scc - //! p_unit_test 1, %e:s[2-3], %f:vcc_hi + //! p_cbranch_z %d:scc + //! p_unit_test 1, %f:vcc_hi auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), Operand(v_in, reg_v0)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); auto ovrwr = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1, vcc_hi), Operand::zero()); - auto br = - bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); - writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc_hi)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp())); + writeout(1, Operand(ovrwr, vcc_hi)); } - //; del b, c, d, e, f + //; del b, c, d, f { /* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */ //! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0] //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec - //! s2: %e:s[2-3] = p_cbranch_z %d:scc - //! p_unit_test 2, %e:s[2-3] + //! p_cbranch_z %d:scc + //! p_unit_test 2 auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(), Operand(v_in, reg_v0)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm)); - auto br = - bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); - writeout(2, Operand(br, reg_s2)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp())); + writeout(2); } - //; del b, c, d, e + //; del b, c, d { /* When the VCC isn't written by VOPC, don't optimize */ //! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5] //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec - //! s2: %e:s[2-3] = p_cbranch_z %d:scc - //! p_unit_test 2, %e:s[2-3] + //! p_cbranch_z %d:scc + //! p_unit_test 2 auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), Operand::c32(1u), Operand(reg_s4, bld.lm)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm)); - auto br = - bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); - writeout(2, Operand(br, reg_s2)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp())); + writeout(2); } - //; del b, c, d, e, f, x + //; del b, c, d, f, x { /* When EXEC is overwritten inbetween, don't optimize. */ @@ -123,19 +117,18 @@ BEGIN_TEST(optimizer_postRA.vcmp) //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec //! s2: %f:exec = s_mov_b64 42 - //! s2: %e:s[2-3] = p_cbranch_z %d:scc - //! p_unit_test 4, %e:s[2-3], %f:exec + //! p_cbranch_z %d:scc + //! p_unit_test 4, %f:exec auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), Operand(v_in, reg_v0)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u)); - auto br = - bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); - writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp())); + writeout(4, Operand(ovrwr, exec)); } - //; del b, c, d, e, f, x + //; del b, c, d, f, x finish_optimizer_postRA_test(); END_TEST @@ -164,73 +157,73 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) { //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 - //! s2: %f:vcc = p_cbranch_nz %e:scc - //! p_unit_test 0, %f:vcc + //! p_cbranch_nz %e:scc + //! p_unit_test 0 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand::c32(0x40018u)); auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand::zero()); - auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); - writeout(0, Operand(br, vcc)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); + writeout(0); } - //; del d, e, f + //; del d, e { //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 - //! s2: %f:vcc = p_cbranch_z %e:scc - //! p_unit_test 1, %f:vcc + //! p_cbranch_z %e:scc + //! p_unit_test 1 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand::c32(0x40018u)); auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand::zero()); - auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); - writeout(1, Operand(br, vcc)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); + writeout(1); } - //; del d, e, f + //; del d, e { //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 - //! s2: %f:vcc = p_cbranch_z %e:scc - //! p_unit_test 2, %f:vcc + //! p_cbranch_z %e:scc + //! p_unit_test 2 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand::c32(0x40018u)); auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand::zero()); - auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp)); - writeout(2, Operand(br, vcc)); + bld.branch(aco_opcode::p_cbranch_nz, bld.scc(scmp)); + writeout(2); } - //; del d, e, f + //; del d, e { //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 - //! s2: %f:vcc = p_cbranch_nz %e:scc - //! p_unit_test 3, %f:vcc + //! p_cbranch_nz %e:scc + //! p_unit_test 3 auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand::c32(0x40018u)); auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand::zero()); - auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp)); - writeout(3, Operand(br, vcc)); + bld.branch(aco_opcode::p_cbranch_nz, bld.scc(scmp)); + writeout(3); } - //; del d, e, f + //; del d, e { //! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345 - //! s2: %f:vcc = p_cbranch_z %e:scc - //! p_unit_test 4, %f:vcc + //! p_cbranch_z %e:scc + //! p_unit_test 4 auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1, Operand::c32(0x12345u)); auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2), Operand::zero(8)); - auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp)); - writeout(4, Operand(br, vcc)); + bld.branch(aco_opcode::p_cbranch_nz, bld.scc(scmp)); + writeout(4); } - //; del d, e, f + //; del d, e { /* SCC is overwritten in between, don't optimize */ @@ -238,57 +231,57 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1 //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0 - //! s2: %f:vcc = p_cbranch_z %g:scc - //! p_unit_test 5, %f:vcc, %h:s[3] + //! p_cbranch_z %g:scc + //! p_unit_test 5, %h:s[3] auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand::c32(0x40018u)); auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, Operand::c32(1u)); auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand::zero()); - auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); - writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); + writeout(5, Operand(ovrw, reg_s3)); } - //; del d, e, f, g, h, x + //; del d, e, g, h, x { /* SCC is overwritten in between, optimize by pulling down */ //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1 //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 - //! s2: %f:vcc = p_cbranch_z %g:scc - //! p_unit_test 5, %f:vcc, %h:s[3] + //! p_cbranch_z %g:scc + //! p_unit_test 5, %h:s[3] auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand::c32(0x40018u)); auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, Operand::c32(1u)); auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand::zero()); - auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); - writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); + writeout(5, Operand(ovrw, reg_s3)); } - //; del d, e, f, g, h, x + //; del d, e, g, h, x { /* SCC is overwritten in between, optimize by pulling down */ //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1 //! s2: %d:s[8-9], s1: %e:scc = s_and_b64 %b:s[4-5], 0x40018 - //! s2: %f:vcc = p_cbranch_z %g:scc - //! p_unit_test 5, %f:vcc, %h:s[3] + //! p_cbranch_z %g:scc + //! p_unit_test 5, %h:s[3] auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), op_in_1, Operand::c32(0x40018u)); auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, Operand::c32(1u)); auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(salu, reg_s8), Operand::zero()); - auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); - writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); + writeout(5, Operand(ovrw, reg_s3)); } - //; del d, e, f, g, h, x + //; del d, e, g, h, x { //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 @@ -541,7 +534,7 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf) Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror); //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec - //! s2: %0:vcc = p_cbranch_nz BB1, BB2 + //! p_cbranch_nz BB1, BB2 emit_divergent_if_else( program.get(), bld, e, @@ -562,18 +555,18 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf) writeout(10, Operand(result, reg_v12)); //! p_logical_end - //! s2: %0:vcc = p_branch BB3 + //! p_branch BB3 /* --- linear then --- */ //! BB2 //! /* logical preds: / linear preds: BB0, / kind: */ - //! s2: %0:vcc = p_branch BB3 + //! p_branch BB3 /* --- invert --- */ //! BB3 //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec - //! s2: %0:vcc = p_cbranch_nz BB4, BB5 + //! p_cbranch_nz BB4, BB5 }, [&]() -> void { @@ -582,12 +575,12 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf) //! /* logical preds: BB0, / linear preds: BB3, / kind: */ //! p_logical_start //! p_logical_end - //! s2: %0:vcc = p_branch BB6 + //! p_branch BB6 /* --- linear else --- */ //! BB5 //! /* logical preds: / linear preds: BB3, / kind: */ - //! s2: %0:vcc = p_branch BB6 + //! p_branch BB6 }); /* --- merge block --- */ @@ -623,7 +616,7 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten) Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror); //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec - //! s2: %0:vcc = p_cbranch_nz BB1, BB2 + //! p_cbranch_nz BB1, BB2 emit_divergent_if_else( program.get(), bld, e, @@ -642,18 +635,18 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten) d, 0, true); //! p_logical_end - //! s2: %0:vcc = p_branch BB3 + //! p_branch BB3 /* --- linear then --- */ //! BB2 //! /* logical preds: / linear preds: BB0, / kind: */ - //! s2: %0:vcc = p_branch BB3 + //! p_branch BB3 /* --- invert --- */ //! BB3 //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec - //! s2: %0:vcc = p_cbranch_nz BB4, BB5 + //! p_cbranch_nz BB4, BB5 }, [&]() -> void { @@ -662,12 +655,12 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten) //! /* logical preds: BB0, / linear preds: BB3, / kind: */ //! p_logical_start //! p_logical_end - //! s2: %0:vcc = p_branch BB6 + //! p_branch BB6 /* --- linear else --- */ //! BB5 //! /* logical preds: / linear preds: BB3, / kind: */ - //! s2: %0:vcc = p_branch BB6 + //! p_branch BB6 }); /* --- merge block --- */ @@ -703,7 +696,7 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_linear_clobber) Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror); //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %c:s[0-1], %0:exec - //! s2: %0:vcc = p_cbranch_nz BB1, BB2 + //! p_cbranch_nz BB1, BB2 emit_divergent_if_else( program.get(), bld, c, @@ -722,18 +715,18 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_linear_clobber) writeout(0, Operand(clobber, a.physReg())); //! p_logical_end - //! s2: %0:vcc = p_branch BB3 + //! p_branch BB3 /* --- linear then --- */ //! BB2 //! /* logical preds: / linear preds: BB0, / kind: */ - //! s2: %0:vcc = p_branch BB3 + //! p_branch BB3 /* --- invert --- */ //! BB3 //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec - //! s2: %0:vcc = p_cbranch_nz BB4, BB5 + //! p_cbranch_nz BB4, BB5 }, [&]() -> void { @@ -749,12 +742,12 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_linear_clobber) writeout(1, Operand(result, reg_v12)); //! p_logical_end - //! s2: %0:vcc = p_branch BB6 + //! p_branch BB6 /* --- linear else --- */ //! BB5 //! /* logical preds: / linear preds: BB3, / kind: */ - //! s2: %0:vcc = p_branch BB6 + //! p_branch BB6 }); /* --- merge block --- */ @@ -788,7 +781,7 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf) Operand::c32(0x40018u)); //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec - //! s2: %0:vcc = p_cbranch_nz BB1, BB2 + //! p_cbranch_nz BB1, BB2 emit_divergent_if_else( program.get(), bld, e, @@ -803,18 +796,18 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf) bld.mubuf(aco_opcode::buffer_store_dword, f, c, Operand::zero(), d, 0, true); //! p_logical_end - //! s2: %0:vcc = p_branch BB3 + //! p_branch BB3 /* --- linear then --- */ //! BB2 //! /* logical preds: / linear preds: BB0, / kind: */ - //! s2: %0:vcc = p_branch BB3 + //! p_branch BB3 /* --- invert --- */ //! BB3 //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec - //! s2: %0:vcc = p_cbranch_nz BB4, BB5 + //! p_cbranch_nz BB4, BB5 }, [&]() -> void { @@ -823,12 +816,12 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf) //! /* logical preds: BB0, / linear preds: BB3, / kind: */ //! p_logical_start //! p_logical_end - //! s2: %0:vcc = p_branch BB6 + //! p_branch BB6 /* --- linear else --- */ //! BB5 //! /* logical preds: / linear preds: BB3, / kind: */ - //! s2: %0:vcc = p_branch BB6 + //! p_branch BB6 }); /* --- merge block --- */ @@ -837,12 +830,12 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf) //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85] //! s2: %tmp_salu:s[8-9], s1: %br_scc:scc = s_and_b64 %a:s[2-3], 0x40018 - //! s2: %br_vcc:vcc = p_cbranch_z %br_scc:scc - //! p_unit_test 5, %br_vcc:vcc + //! p_cbranch_z %br_scc:scc + //! p_unit_test 5 auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(tmp_salu, reg_s8), Operand::zero()); - auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); - writeout(5, Operand(br, vcc)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); + writeout(5); finish_optimizer_postRA_test(); END_TEST @@ -874,7 +867,7 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten) Operand::c32(0x40018u)); //! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec - //! s2: %0:vcc = p_cbranch_nz BB1, BB2 + //! p_cbranch_nz BB1, BB2 emit_divergent_if_else( program.get(), bld, e, @@ -892,18 +885,18 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten) bld.mubuf(aco_opcode::buffer_store_dword, g, c, Operand(s_addr, reg_s3), d, 0, true); //! p_logical_end - //! s2: %0:vcc = p_branch BB3 + //! p_branch BB3 /* --- linear then --- */ //! BB2 //! /* logical preds: / linear preds: BB0, / kind: */ - //! s2: %0:vcc = p_branch BB3 + //! p_branch BB3 /* --- invert --- */ //! BB3 //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */ //! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec - //! s2: %0:vcc = p_cbranch_nz BB4, BB5 + //! p_cbranch_nz BB4, BB5 }, [&]() -> void { @@ -912,12 +905,12 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten) //! /* logical preds: BB0, / linear preds: BB3, / kind: */ //! p_logical_start //! p_logical_end - //! s2: %0:vcc = p_branch BB6 + //! p_branch BB6 /* --- linear else --- */ //! BB5 //! /* logical preds: / linear preds: BB3, / kind: */ - //! s2: %0:vcc = p_branch BB6 + //! p_branch BB6 }); /* --- merge block --- */ @@ -926,12 +919,12 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten) //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85] //! s1: %br_scc:scc = s_cmp_lg_u64 %tmp_salu:s[8-9], 0 - //! s2: %br_vcc:vcc = p_cbranch_z %br_scc:scc - //! p_unit_test 5, %br_vcc:vcc + //! p_cbranch_z %br_scc:scc + //! p_unit_test 5 auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(tmp_salu, reg_s8), Operand::zero()); - auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); - writeout(5, Operand(br, vcc)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp)); + writeout(5); finish_optimizer_postRA_test(); END_TEST diff --git a/src/amd/compiler/tests/test_regalloc.cpp b/src/amd/compiler/tests/test_regalloc.cpp index 17dba8c2c53..7d3cb60a316 100644 --- a/src/amd/compiler/tests/test_regalloc.cpp +++ b/src/amd/compiler/tests/test_regalloc.cpp @@ -179,8 +179,8 @@ BEGIN_TEST(regalloc.branch_def_phis_at_merge_block) program->blocks[0].kind &= ~block_kind_top_level; - //! s2: %_:s[2-3] = p_branch - bld.branch(aco_opcode::p_branch, bld.def(s2)); + //! p_branch + bld.branch(aco_opcode::p_branch); //! BB1 //! /* logical preds: / linear preds: BB0, / kind: uniform, */ @@ -204,8 +204,8 @@ BEGIN_TEST(regalloc.branch_def_phis_at_branch_block) //! s2: %tmp:s[0-1] = p_unit_test Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2)); - //! s2: %_:s[2-3] = p_cbranch_z %0:scc - bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(scc, s1)); + //! p_cbranch_z %0:scc + bld.branch(aco_opcode::p_cbranch_z, Operand(scc, s1)); //! BB1 //! /* logical preds: / linear preds: BB0, / kind: */ @@ -214,12 +214,12 @@ BEGIN_TEST(regalloc.branch_def_phis_at_branch_block) //! p_unit_test %tmp:s[0-1] bld.pseudo(aco_opcode::p_unit_test, tmp); - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); bld.reset(program->create_and_insert_block()); program->blocks[2].linear_preds.push_back(0); - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); bld.reset(program->create_and_insert_block()); program->blocks[3].linear_preds.push_back(1); @@ -522,12 +522,12 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def) Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc)); //! lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] - //~gfx8_cbranch! s2: %_:s[0-1] = p_cbranch_z %scc_tmp:scc - //~gfx8_branch! s2: %_:s[0-1] = p_branch + //~gfx8_cbranch! p_cbranch_z %scc_tmp:scc + //~gfx8_branch! p_branch if (cbr) - bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), bld.scc(scc_tmp)); + bld.branch(aco_opcode::p_cbranch_z, bld.scc(scc_tmp)); else - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); //! BB1 //! /* logical preds: BB0, / linear preds: BB0, / kind: */ @@ -536,9 +536,9 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def) program->blocks[1].logical_preds.push_back(0); //! v29: %_:v[0-28] = p_unit_test - //! s2: %_:s[0-1] = p_branch + //! p_branch bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 29 * 4))); - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); //! BB2 //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, top-level, */ @@ -554,7 +554,7 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def) finish_ra_test(ra_test_policy()); - //~gfx8_cbranch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] needs_scratch:1 scratch:s1 + //~gfx8_cbranch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] needs_scratch:1 scratch:s0 //~gfx8_branch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] needs_scratch:1 scratch:s253 aco_ptr& parallelcopy = program->blocks[0].instructions[6]; aco_print_instr(program->gfx_level, parallelcopy.get(), output); @@ -584,12 +584,12 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_phis) end_linear_vgpr(ltmp1); //! lv1: %ltmp2_2:v[30] = p_parallelcopy %ltmp2:v[29] - //~gfx8_cbranch! s2: %_:s[0-1] = p_cbranch_z %_:scc - //~gfx8_branch! s2: %_:s[0-1] = p_branch + //~gfx8_cbranch! p_cbranch_z %_:scc + //~gfx8_branch! p_branch if (cbr) - bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(scc, s1)); + bld.branch(aco_opcode::p_cbranch_z, Operand(scc, s1)); else - bld.branch(aco_opcode::p_branch, bld.def(s2)); + bld.branch(aco_opcode::p_branch); //! BB1 //! /* logical preds: BB0, / linear preds: BB0, / kind: */ @@ -597,8 +597,8 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_phis) program->blocks[1].linear_preds.push_back(0); program->blocks[1].logical_preds.push_back(0); - //! s2: %_:s[0-1] = p_branch - bld.branch(aco_opcode::p_branch, bld.def(s2)); + //! p_branch + bld.branch(aco_opcode::p_branch); //! BB2 //! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, top-level, */