aco: remove definition from Pseudo branch instructions

They are not needed anymore.

Totals from 7019 (8.84% of 79395) affected shaders: (Navi31)

Instrs: 14805400 -> 14824196 (+0.13%); split: -0.00%, +0.13%
CodeSize: 78079972 -> 78132932 (+0.07%); split: -0.01%, +0.08%
SpillSGPRs: 4485 -> 4515 (+0.67%); split: -0.76%, +1.43%
Latency: 165862000 -> 165836134 (-0.02%); split: -0.02%, +0.00%
InvThroughput: 30061764 -> 30057781 (-0.01%); split: -0.01%, +0.00%
SClause: 392323 -> 392286 (-0.01%); split: -0.01%, +0.00%
Copies: 1012262 -> 1012234 (-0.00%); split: -0.04%, +0.04%
Branches: 365910 -> 365909 (-0.00%); split: -0.00%, +0.00%
PreSGPRs: 360167 -> 355363 (-1.33%)
VALU: 8837197 -> 8837276 (+0.00%); split: -0.00%, +0.00%
SALU: 1402593 -> 1402621 (+0.00%); split: -0.03%, +0.03%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32037>
This commit is contained in:
Daniel Schürmann
2024-11-07 09:42:04 +01:00
committed by Marge Bot
parent 7e4687fd04
commit b64fff7731
11 changed files with 188 additions and 243 deletions
+1 -1
View File
@@ -564,7 +564,7 @@ formats = [("pseudo", [Format.PSEUDO], list(itertools.product(range(5), range(6)
("mtbuf", [Format.MTBUF], [(0, 4), (1, 3)]),
("mimg", [Format.MIMG], itertools.product([0, 1], [3, 4, 5, 6, 7])),
("exp", [Format.EXP], [(0, 4), (0, 5)]),
("branch", [Format.PSEUDO_BRANCH], itertools.product([1], [0, 1])),
("branch", [Format.PSEUDO_BRANCH], [(0, 0), (0, 1)]),
("barrier", [Format.PSEUDO_BARRIER], [(0, 0)]),
("reduction", [Format.PSEUDO_REDUCTION], [(3, 3)]),
("vop1", [Format.VOP1], [(0, 0), (1, 1), (1, 2), (2, 2)]),
+6 -6
View File
@@ -637,8 +637,8 @@ add_branch_code(exec_ctx& ctx, Block* block)
if (need_parallelcopy)
bld.copy(Definition(exec, bld.lm), ctx.info[idx].exec.back().op);
bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2), Operand(exec, bld.lm),
block->linear_succs[1], block->linear_succs[0]);
bld.branch(aco_opcode::p_cbranch_nz, Operand(exec, bld.lm), block->linear_succs[1],
block->linear_succs[0]);
} else if (block->kind & block_kind_uniform) {
Pseudo_branch_instruction& branch = block->instructions.back()->branch();
if (branch.opcode == aco_opcode::p_branch) {
@@ -671,7 +671,7 @@ add_branch_code(exec_ctx& ctx, Block* block)
/* add next current exec to the stack */
ctx.info[idx].exec.emplace_back(Operand(exec, bld.lm), mask_type);
Builder::Result r = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(exec, bld.lm),
Builder::Result r = bld.branch(aco_opcode::p_cbranch_z, Operand(exec, bld.lm),
block->linear_succs[1], block->linear_succs[0]);
r->branch().rarely_taken = branch->branch().rarely_taken;
r->branch().never_taken = branch->branch().never_taken;
@@ -685,7 +685,7 @@ add_branch_code(exec_ctx& ctx, Block* block)
bld.sop2(Builder::s_andn2, Definition(exec, bld.lm), bld.def(s1, scc), orig_exec,
Operand(exec, bld.lm));
Builder::Result r = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(exec, bld.lm),
Builder::Result r = bld.branch(aco_opcode::p_cbranch_z, Operand(exec, bld.lm),
block->linear_succs[1], block->linear_succs[0]);
r->branch().rarely_taken = branch->branch().rarely_taken;
r->branch().never_taken = branch->branch().never_taken;
@@ -713,7 +713,7 @@ add_branch_code(exec_ctx& ctx, Block* block)
bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes()));
}
bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2), bld.scc(cond), block->linear_succs[1],
bld.branch(aco_opcode::p_cbranch_nz, bld.scc(cond), block->linear_succs[1],
block->linear_succs[0]);
} else if (block->kind & block_kind_continue) {
assert(block->instructions.back()->opcode == aco_opcode::p_branch);
@@ -739,7 +739,7 @@ add_branch_code(exec_ctx& ctx, Block* block)
bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes()));
}
bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2), bld.scc(cond), block->linear_succs[1],
bld.branch(aco_opcode::p_cbranch_nz, bld.scc(cond), block->linear_succs[1],
block->linear_succs[0]);
} else {
unreachable("unknown/invalid block type");
+17 -26
View File
@@ -10125,7 +10125,7 @@ begin_loop(isel_context* ctx, loop_context* lc)
append_logical_end(ctx->block);
ctx->block->kind |= block_kind_loop_preheader | block_kind_uniform;
Builder bld(ctx->program, ctx->block);
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
unsigned loop_preheader_idx = ctx->block->index;
lc->loop_exit.kind |= (block_kind_loop_exit | (ctx->block->kind & block_kind_top_level));
@@ -10201,14 +10201,14 @@ end_loop(isel_context* ctx, loop_context* lc)
Block* break_block = ctx->program->create_and_insert_block();
break_block->kind = block_kind_uniform;
bld.reset(break_block);
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
add_linear_edge(block_idx, break_block);
add_linear_edge(break_block->index, &lc->loop_exit);
Block* continue_block = ctx->program->create_and_insert_block();
continue_block->kind = block_kind_uniform;
bld.reset(continue_block);
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
add_linear_edge(block_idx, continue_block);
add_linear_edge(continue_block->index, &ctx->program->blocks[loop_header_idx]);
@@ -10227,7 +10227,7 @@ end_loop(isel_context* ctx, loop_context* lc)
}
bld.reset(ctx->block);
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
}
ctx->cf_info.has_branch = false;
@@ -10271,7 +10271,7 @@ emit_loop_jump(isel_context* ctx, bool is_break)
/* uniform break - directly jump out of the loop */
ctx->block->kind |= block_kind_uniform;
ctx->cf_info.has_branch = true;
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
add_linear_edge(idx, logical_target);
return;
}
@@ -10290,7 +10290,7 @@ emit_loop_jump(isel_context* ctx, bool is_break)
/* uniform continue - directly jump to the loop header */
ctx->block->kind |= block_kind_uniform;
ctx->cf_info.has_branch = true;
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
add_linear_edge(idx, logical_target);
return;
}
@@ -10310,7 +10310,7 @@ emit_loop_jump(isel_context* ctx, bool is_break)
}
/* remove critical edges from linear CFG */
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
Block* break_block = ctx->program->create_and_insert_block();
break_block->kind |= block_kind_uniform;
add_linear_edge(idx, break_block);
@@ -10319,7 +10319,7 @@ emit_loop_jump(isel_context* ctx, bool is_break)
logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx];
add_linear_edge(break_block->index, logical_target);
bld.reset(break_block);
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
Block* continue_block = ctx->program->create_and_insert_block();
add_linear_edge(idx, continue_block);
@@ -10436,8 +10436,7 @@ begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond,
/* branch to linear then block */
assert(cond.regClass() == ctx->program->lane_mask);
aco_ptr<Instruction> branch;
branch.reset(create_instruction(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
branch.reset(create_instruction(aco_opcode::p_cbranch_z, Format::PSEUDO_BRANCH, 1, 0));
branch->operands[0] = Operand(cond);
bool never_taken =
sel_ctrl == nir_selection_control_divergent_always_taken &&
@@ -10479,8 +10478,7 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic,
append_logical_end(BB_then_logical);
/* branch from logical then block to invert block */
aco_ptr<Instruction> branch;
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
BB_then_logical->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_then_logical->index, &ic->BB_invert);
if (!ctx->cf_info.parent_loop.has_divergent_branch)
@@ -10495,8 +10493,7 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic,
BB_then_linear->kind |= block_kind_uniform;
add_linear_edge(ic->BB_if_idx, BB_then_linear);
/* branch from linear then block to invert block */
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
BB_then_linear->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_then_linear->index, &ic->BB_invert);
@@ -10505,8 +10502,7 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic,
ic->invert_idx = ctx->block->index;
/* branch to linear else block (skip else) */
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
bool never_taken =
sel_ctrl == nir_selection_control_divergent_always_taken &&
!(ctx->cf_info.exec.potentially_empty_discard || ctx->cf_info.exec.potentially_empty_break ||
@@ -10539,8 +10535,7 @@ end_divergent_if(isel_context* ctx, if_context* ic)
/* branch from logical else block to endif block */
aco_ptr<Instruction> branch;
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
BB_else_logical->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_else_logical->index, &ic->BB_endif);
if (!ctx->cf_info.parent_loop.has_divergent_branch)
@@ -10557,8 +10552,7 @@ end_divergent_if(isel_context* ctx, if_context* ic)
add_linear_edge(ic->invert_idx, BB_else_linear);
/* branch from linear else block to endif block */
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
BB_else_linear->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_else_linear->index, &ic->BB_endif);
@@ -10587,8 +10581,7 @@ begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond)
aco_ptr<Instruction> branch;
aco_opcode branch_opcode = aco_opcode::p_cbranch_z;
branch.reset(create_instruction(branch_opcode, Format::PSEUDO_BRANCH, 1, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
branch.reset(create_instruction(branch_opcode, Format::PSEUDO_BRANCH, 1, 0));
if (cond.id()) {
branch->operands[0] = Operand(cond);
branch->operands[0].setPrecolored(scc);
@@ -10626,8 +10619,7 @@ begin_uniform_if_else(isel_context* ctx, if_context* ic, bool logical_else)
append_logical_end(BB_then);
/* branch from then block to endif block */
aco_ptr<Instruction> branch;
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
BB_then->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_then->index, &ic->BB_endif);
if (!ctx->cf_info.parent_loop.has_divergent_branch)
@@ -10665,8 +10657,7 @@ end_uniform_if(isel_context* ctx, if_context* ic, bool logical_else)
append_logical_end(BB_else);
/* branch from then block to endif block */
aco_ptr<Instruction> branch;
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 1));
branch->definitions[0] = Definition(ctx->program->allocateTmp(s2));
branch.reset(create_instruction(aco_opcode::p_branch, Format::PSEUDO_BRANCH, 0, 0));
BB_else->instructions.emplace_back(std::move(branch));
add_linear_edge(BB_else->index, &ic->BB_endif);
if (logical_else && !ctx->cf_info.parent_loop.has_divergent_branch)
-2
View File
@@ -322,8 +322,6 @@ try_merge_break_with_continue(jump_threading_ctx& ctx, Block* block)
merge->instructions.back()->branch().target[0] = merge->index;
std::swap(merge->instructions.back(), block->instructions.back());
std::swap(merge->instructions.back()->definitions[0],
block->instructions.back()->definitions[0]);
block->linear_succs.clear();
block->linear_succs.push_back(merge->index);
@@ -352,7 +352,6 @@ process_live_temps_per_block(live_ctx& ctx, Block* block)
}
block->live_in_demand = new_demand;
block->live_in_demand.sgpr += 2; /* Add 2 SGPRs for potential long-jumps. */
block->register_demand.update(block->live_in_demand);
ctx.program->max_reg_demand.update(block->register_demand);
ctx.handled_once = std::min(ctx.handled_once, block->index);
@@ -3063,38 +3063,12 @@ register_allocation(Program* program, ra_test_policy policy)
get_regs_for_phis(ctx, block, register_file, instructions,
program->live.live_in[block.index]);
/* If this is a merge block, the state of the register file at the branch instruction of the
* predecessors corresponds to the state after phis at the merge block. So, we allocate a
* register for the predecessor's branch definitions as if there was a phi.
*/
if (!block.linear_preds.empty() &&
(block.linear_preds.size() != 1 ||
program->blocks[block.linear_preds[0]].linear_succs.size() == 1)) {
PhysReg br_reg = get_reg_phi(ctx, program->live.live_in[block.index], register_file,
instructions, block, ctx.phi_dummy, Temp(0, s2));
for (unsigned pred : block.linear_preds) {
aco_ptr<Instruction>& br = program->blocks[pred].instructions.back();
assert(br->definitions.size() == 1 && br->definitions[0].regClass() == s2 &&
br->definitions[0].isKill());
br->definitions[0].setFixed(br_reg);
}
}
/* Handle all other instructions of the block */
auto NonPhi = [](aco_ptr<Instruction>& instr) -> bool { return instr && !is_phi(instr); };
auto instr_it = std::find_if(block.instructions.begin(), block.instructions.end(), NonPhi);
for (; instr_it != block.instructions.end(); ++instr_it) {
aco_ptr<Instruction>& instr = *instr_it;
std::vector<std::pair<Operand, Definition>> parallelcopy;
if (instr->opcode == aco_opcode::p_branch) {
/* unconditional branches are handled after phis of the target */
instructions.emplace_back(std::move(instr));
break;
}
assert(!is_phi(instr));
/* handle operands */
+1 -9
View File
@@ -1515,8 +1515,7 @@ validate_ra(Program* program)
}
}
if (!instr->isBranch() || block.linear_succs.size() != 1)
err |= validate_instr_defs(program, regs, assignments, loc, instr);
err |= validate_instr_defs(program, regs, assignments, loc, instr);
if (!is_phi(instr)) {
for (const Operand& op : instr->operands) {
@@ -1527,13 +1526,6 @@ validate_ra(Program* program)
regs[op.physReg().reg_b + j] = 0;
}
}
} else if (block.linear_preds.size() != 1 ||
program->blocks[block.linear_preds[0]].linear_succs.size() == 1) {
for (unsigned pred : block.linear_preds) {
aco_ptr<Instruction>& br = program->blocks[pred].instructions.back();
assert(br->isBranch());
err |= validate_instr_defs(program, regs, assignments, loc, br);
}
}
}
}
+6 -8
View File
@@ -604,32 +604,30 @@ emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::functi
b.reset(if_block);
Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg),
Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index,
then_linear->index);
b.branch(aco_opcode::p_cbranch_nz, then_logical->index, then_linear->index);
b.reset(then_logical);
b.pseudo(aco_opcode::p_logical_start);
then();
b.pseudo(aco_opcode::p_logical_end);
b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), invert->index);
b.branch(aco_opcode::p_branch, invert->index);
b.reset(then_linear);
b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), invert->index);
b.branch(aco_opcode::p_branch, invert->index);
b.reset(invert);
b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1),
Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index,
else_linear->index);
b.branch(aco_opcode::p_cbranch_nz, else_logical->index, else_linear->index);
b.reset(else_logical);
b.pseudo(aco_opcode::p_logical_start);
els();
b.pseudo(aco_opcode::p_logical_end);
b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), endif_block->index);
b.branch(aco_opcode::p_branch, endif_block->index);
b.reset(else_linear);
b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), endif_block->index);
b.branch(aco_opcode::p_branch, endif_block->index);
b.reset(endif_block);
b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
+30 -30
View File
@@ -422,7 +422,7 @@ BEGIN_TEST(isel.cf.unreachable_break.uniform_continue)
//! s2: %zero = p_parallelcopy 0
//! s2: %_, s1: %cond:scc = s_and_b64 %zero, %0:exec
//! p_logical_end
//! s2: %_ = p_cbranch_z %cond:scc
//! p_cbranch_z %cond:scc
//! BB5
//! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, break, */
//>> BB6
@@ -504,7 +504,7 @@ BEGIN_TEST(isel.cf.unreachable_continue.mixed_break)
//! p_logical_start
//! s2: %cond = p_unit_test 5
//! p_logical_end
//! s2: %_ = p_cbranch_z %cond
//! p_cbranch_z %cond
nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base=5));
{
//>> BB5
@@ -604,7 +604,7 @@ BEGIN_TEST(isel.cf.unreachable_continue.nested_mixed_break)
//! s2: %cond1 = p_unit_test 4
//! s2: %_, s1: %_:scc = s_and_b64 %cond1, %0:exec
//! p_logical_end
//! s2: %_ = p_cbranch_z %_:scc
//! p_cbranch_z %_:scc
nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base=4));
{
//>> BB5
@@ -621,7 +621,7 @@ BEGIN_TEST(isel.cf.unreachable_continue.nested_mixed_break)
//! p_logical_start
//! s2: %cond2 = p_unit_test 5
//! p_logical_end
//! s2: %_ = p_cbranch_z %cond2
//! p_cbranch_z %cond2
nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base=5));
{
//>> BB8
@@ -677,7 +677,7 @@ BEGIN_TEST(isel.cf.unreachable_loop_exit)
//>> s1: %_ = p_unit_test 0
//>> s2: %zero = p_parallelcopy 0
//>> s2: %_, s1: %cond:scc = s_and_b64 %zero, %0:exec
//>> s2: %_ = p_cbranch_z %cond:scc
//>> p_cbranch_z %cond:scc
//! BB2
//! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, break, */
//>> BB4
@@ -760,7 +760,7 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use)
//>> s2: %cond = p_unit_test 2
//! s2: %_, s1: %_:scc = s_and_b64 %cond, %0:exec
//! p_logical_end
//! s2: %_ = p_cbranch_z %_:scc
//! p_cbranch_z %_:scc
nir_def *val;
nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base=2));
{
@@ -773,7 +773,7 @@ BEGIN_TEST(isel.cf.uniform_if_branch_use)
/* The contents of this branch is moved to the merge block. */
//>> BB14
//! /* logical preds: BB13, / linear preds: BB12, BB13, / kind: uniform, */
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
//! BB15
//! /* logical preds: BB14, / linear preds: BB14, / kind: uniform, */
//! p_logical_start
@@ -1165,7 +1165,7 @@ BEGIN_TEST(isel.cf.empty_exec.uniform_if)
//>> BB0
//>> s2: %_ = p_unit_test 0
//>> s2: %_ = p_cbranch_z %_:scc
//>> p_cbranch_z %_:scc
nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 0));
{
//>> BB1
@@ -1205,26 +1205,26 @@ BEGIN_TEST(isel.cf.empty_exec.divergent_if)
//>> BB0
//>> s2: %_ = p_unit_test 0
//>> s2: %_ = p_cbranch_z %_
//>> p_cbranch_z %_
nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0));
{
//>> BB1
//>> s2: %_ = p_unit_test 1
//>> p_discard_if %_
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 1));
//>> p_unit_test 2, %_
nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 2);
//>> s2: %_ = p_unit_test 3
//>> s2: %_ = p_cbranch_z %_
//>> p_cbranch_z %_
nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 3));
{
//>> p_unit_test 4, %_
//>> s2: %_ = p_unit_test 5
//>> p_discard_if %_
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 4);
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 5));
@@ -1236,7 +1236,7 @@ BEGIN_TEST(isel.cf.empty_exec.divergent_if)
//>> p_unit_test 7, %_
//>> s2: %_ = p_unit_test 8
//>> p_discard_if %_
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 7);
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 8));
@@ -1250,7 +1250,7 @@ BEGIN_TEST(isel.cf.empty_exec.divergent_if)
//>> /* logical preds: / linear preds: BB1, / kind: uniform, */
//>> BB16
//! /* logical preds: BB14, / linear preds: BB14, BB15, / kind: uniform, */
//>> s2: %35 = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
//>> p_unit_test 10, %_
nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 10);
@@ -1284,7 +1284,7 @@ BEGIN_TEST(isel.cf.empty_exec.loop_terminate)
//>> p_unit_test 0, %_
//>> s2: %_ = p_unit_test 1
//>> p_discard_if %_
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 0);
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 1));
@@ -1337,7 +1337,7 @@ BEGIN_TEST(isel.cf.empty_exec.loop_break)
//>> BB10
//! /* logical preds: BB8, / linear preds: BB8, BB9, / kind: uniform, merge, */
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
//>> BB11
//>> p_unit_test 3, %_
nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 3);
@@ -1397,7 +1397,7 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue)
//>> BB13
//! /* logical preds: BB11, / linear preds: BB11, BB12, / kind: uniform, merge, */
//>> s2: %23 = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
//>> BB14
//>> p_unit_test 3, %_
nir_unit_test_amd(nb, nir_undef(nb, 1, 32), .base = 3);
@@ -1463,7 +1463,7 @@ BEGIN_TEST(isel.cf.empty_exec.loop_continue_then_break)
nir_break_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 3));
//>> BB17
//! /* logical preds: BB15, / linear preds: BB15, BB16, / kind: uniform, merge, */
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
//>> BB18
//! /* logical preds: BB17, / linear preds: BB17, / kind: uniform, */
@@ -1499,7 +1499,7 @@ BEGIN_TEST(isel.cf.empty_exec.terminate_then_uniform_if)
//>> BB0
//>> s2: %_ = p_unit_test 0
//>> s2: %_ = p_cbranch_z %_
//>> p_cbranch_z %_
nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0));
{
//>> BB1
@@ -1508,9 +1508,9 @@ BEGIN_TEST(isel.cf.empty_exec.terminate_then_uniform_if)
//>> p_discard_if %_
nir_def* cond = nir_unit_test_uniform_amd(nb, 1, 1, .base = 1);
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2));
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
//>> s2: %_ = p_cbranch_z %_:scc
//>> p_cbranch_z %_:scc
nir_push_if(nb, cond);
{
//>> p_unit_test 3, %2
@@ -1543,7 +1543,7 @@ BEGIN_TEST(isel.cf.empty_exec.terminate_then_divergent_if)
//>> BB0
//>> s2: %_ = p_unit_test 0
//>> s2: %_ = p_cbranch_z %_
//>> p_cbranch_z %_
nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0));
{
//>> BB1
@@ -1552,9 +1552,9 @@ BEGIN_TEST(isel.cf.empty_exec.terminate_then_divergent_if)
//>> p_discard_if %_
nir_def* cond = nir_unit_test_divergent_amd(nb, 1, 1, .base = 1);
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2));
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
//>> s2: %_ = p_cbranch_z %_
//>> p_cbranch_z %_
nir_push_if(nb, cond);
{
//>> BB3
@@ -1591,14 +1591,14 @@ BEGIN_TEST(isel.cf.empty_exec.terminate_then_loop)
//>> BB0
//>> s2: %_ = p_unit_test 0
//>> s2: %_ = p_cbranch_z %_
//>> p_cbranch_z %_
nir_push_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 0));
{
//>> BB1
//>> s2: %_ = p_unit_test 1
//>> p_discard_if %_
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 1));
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
//>> BB2
//! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, loop-preheader, */
@@ -1649,7 +1649,7 @@ BEGIN_TEST(isel.cf.empty_exec.repair_ssa)
//>> BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: uniform, discard, */
//>> s2: %_ = p_unit_test 1
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 1));
nir_push_loop(nb);
@@ -1657,7 +1657,7 @@ BEGIN_TEST(isel.cf.empty_exec.repair_ssa)
//>> BB3
//! /* logical preds: BB2, BB20, / linear preds: BB2, BB22, / kind: uniform, loop-header, discard, */
//>> s2: %_ = p_unit_test 2
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 2));
nir_push_loop(nb);
@@ -1665,7 +1665,7 @@ BEGIN_TEST(isel.cf.empty_exec.repair_ssa)
//>> BB5
//! /* logical preds: BB4, / linear preds: BB4, / kind: uniform, loop-header, discard, */
//>> s2: %_ = p_unit_test 3
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
nir_terminate_if(nb, nir_unit_test_divergent_amd(nb, 1, 1, .base = 3));
//>> BB6
@@ -1695,7 +1695,7 @@ BEGIN_TEST(isel.cf.empty_exec.repair_ssa)
//>> BB14
//! /* logical preds: BB12, / linear preds: BB12, BB13, / kind: uniform, */
//! s1: %sgpr3 = p_linear_phi %sgpr2, s1: undef
//>> s2: %_ = p_cbranch_z %0:exec rarely_taken
//>> p_cbranch_z %0:exec rarely_taken
//>> BB15
//! /* logical preds: BB14, / linear preds: BB14, / kind: uniform, */
+108 -115
View File
@@ -11,7 +11,6 @@ using namespace aco;
BEGIN_TEST(optimizer_postRA.vcmp)
PhysReg reg_v0(256);
PhysReg reg_s0(0);
PhysReg reg_s2(2);
PhysReg reg_s4(4);
//>> v1: %a:v[0] = p_startpgm
@@ -28,18 +27,17 @@ BEGIN_TEST(optimizer_postRA.vcmp)
/* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %e:s[2-3] = p_cbranch_z %b:vcc
//! p_unit_test 0, %e:s[2-3]
//! p_cbranch_z %b:vcc
//! p_unit_test 0
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
Operand(exec, bld.lm));
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(0, Operand(br, reg_s2));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp()));
writeout(0);
}
//; del b, e
//; del b
{
/* When VCC is overwritten inbetween, don't optimize. */
@@ -47,19 +45,18 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s2: %f:vcc = s_mov_b64 0
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 1, %e:s[2-3], %f:vcc
//! p_cbranch_z %d:scc
//! p_unit_test 1, %f:vcc
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
Operand(exec, bld.lm));
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp()));
writeout(1, Operand(ovrwr, vcc));
}
//; del b, c, d, e, f
//; del b, c, d, f
{
/* When part of VCC is overwritten inbetween, don't optimize. */
@@ -67,55 +64,52 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s1: %f:vcc_hi = s_mov_b32 0
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 1, %e:s[2-3], %f:vcc_hi
//! p_cbranch_z %d:scc
//! p_unit_test 1, %f:vcc_hi
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
Operand(exec, bld.lm));
auto ovrwr = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1, vcc_hi), Operand::zero());
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc_hi));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp()));
writeout(1, Operand(ovrwr, vcc_hi));
}
//; del b, c, d, e, f
//; del b, c, d, f
{
/* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */
//! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 2, %e:s[2-3]
//! p_cbranch_z %d:scc
//! p_unit_test 2
auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc),
Operand(vcmp, reg_s4), Operand(exec, bld.lm));
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(2, Operand(br, reg_s2));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp()));
writeout(2);
}
//; del b, c, d, e
//; del b, c, d
{
/* When the VCC isn't written by VOPC, don't optimize */
//! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 2, %e:s[2-3]
//! p_cbranch_z %d:scc
//! p_unit_test 2
auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), Operand::c32(1u),
Operand(reg_s4, bld.lm));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc),
Operand(salu, vcc), Operand(exec, bld.lm));
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(2, Operand(br, reg_s2));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp()));
writeout(2);
}
//; del b, c, d, e, f, x
//; del b, c, d, f, x
{
/* When EXEC is overwritten inbetween, don't optimize. */
@@ -123,19 +117,18 @@ BEGIN_TEST(optimizer_postRA.vcmp)
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
//! s2: %f:exec = s_mov_b64 42
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
//! p_unit_test 4, %e:s[2-3], %f:exec
//! p_cbranch_z %d:scc
//! p_unit_test 4, %f:exec
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
Operand(v_in, reg_v0));
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
Operand(exec, bld.lm));
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
auto br =
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(sand.def(1).getTemp()));
writeout(4, Operand(ovrwr, exec));
}
//; del b, c, d, e, f, x
//; del b, c, d, f, x
finish_optimizer_postRA_test();
END_TEST
@@ -164,73 +157,73 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_nz %e:scc
//! p_unit_test 0, %f:vcc
//! p_cbranch_nz %e:scc
//! p_unit_test 0
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(0, Operand(br, vcc));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
writeout(0);
}
//; del d, e, f
//; del d, e
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_z %e:scc
//! p_unit_test 1, %f:vcc
//! p_cbranch_z %e:scc
//! p_unit_test 1
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(1, Operand(br, vcc));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
writeout(1);
}
//; del d, e, f
//; del d, e
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_z %e:scc
//! p_unit_test 2, %f:vcc
//! p_cbranch_z %e:scc
//! p_unit_test 2
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
writeout(2, Operand(br, vcc));
bld.branch(aco_opcode::p_cbranch_nz, bld.scc(scmp));
writeout(2);
}
//; del d, e, f
//; del d, e
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_nz %e:scc
//! p_unit_test 3, %f:vcc
//! p_cbranch_nz %e:scc
//! p_unit_test 3
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
writeout(3, Operand(br, vcc));
bld.branch(aco_opcode::p_cbranch_nz, bld.scc(scmp));
writeout(3);
}
//; del d, e, f
//; del d, e
{
//! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345
//! s2: %f:vcc = p_cbranch_z %e:scc
//! p_unit_test 4, %f:vcc
//! p_cbranch_z %e:scc
//! p_unit_test 4
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,
Operand::c32(0x12345u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero(8));
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
writeout(4, Operand(br, vcc));
bld.branch(aco_opcode::p_cbranch_nz, bld.scc(scmp));
writeout(4);
}
//; del d, e, f
//; del d, e
{
/* SCC is overwritten in between, don't optimize */
@@ -238,57 +231,57 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
//! s2: %f:vcc = p_cbranch_z %g:scc
//! p_unit_test 5, %f:vcc, %h:s[3]
//! p_cbranch_z %g:scc
//! p_unit_test 5, %h:s[3]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
writeout(5, Operand(ovrw, reg_s3));
}
//; del d, e, f, g, h, x
//; del d, e, g, h, x
{
/* SCC is overwritten in between, optimize by pulling down */
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
//! s2: %f:vcc = p_cbranch_z %g:scc
//! p_unit_test 5, %f:vcc, %h:s[3]
//! p_cbranch_z %g:scc
//! p_unit_test 5, %h:s[3]
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
writeout(5, Operand(ovrw, reg_s3));
}
//; del d, e, f, g, h, x
//; del d, e, g, h, x
{
/* SCC is overwritten in between, optimize by pulling down */
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
//! s2: %d:s[8-9], s1: %e:scc = s_and_b64 %b:s[4-5], 0x40018
//! s2: %f:vcc = p_cbranch_z %g:scc
//! p_unit_test 5, %f:vcc, %h:s[3]
//! p_cbranch_z %g:scc
//! p_unit_test 5, %h:s[3]
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), op_in_1,
Operand::c32(0x40018u));
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
Operand::c32(1u));
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(salu, reg_s8),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
writeout(5, Operand(ovrw, reg_s3));
}
//; del d, e, f, g, h, x
//; del d, e, g, h, x
{
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
@@ -541,7 +534,7 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror);
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
//! p_cbranch_nz BB1, BB2
emit_divergent_if_else(
program.get(), bld, e,
@@ -562,18 +555,18 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
writeout(10, Operand(result, reg_v12));
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
//! p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
//! p_branch BB3
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
//! p_cbranch_nz BB4, BB5
},
[&]() -> void
{
@@ -582,12 +575,12 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
//! p_logical_start
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
//! p_branch BB6
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
//! p_branch BB6
});
/* --- merge block --- */
@@ -623,7 +616,7 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror);
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
//! p_cbranch_nz BB1, BB2
emit_divergent_if_else(
program.get(), bld, e,
@@ -642,18 +635,18 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
d, 0, true);
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
//! p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
//! p_branch BB3
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
//! p_cbranch_nz BB4, BB5
},
[&]() -> void
{
@@ -662,12 +655,12 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
//! p_logical_start
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
//! p_branch BB6
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
//! p_branch BB6
});
/* --- merge block --- */
@@ -703,7 +696,7 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_linear_clobber)
Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror);
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %c:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
//! p_cbranch_nz BB1, BB2
emit_divergent_if_else(
program.get(), bld, c,
@@ -722,18 +715,18 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_linear_clobber)
writeout(0, Operand(clobber, a.physReg()));
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
//! p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
//! p_branch BB3
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
//! p_cbranch_nz BB4, BB5
},
[&]() -> void
{
@@ -749,12 +742,12 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_linear_clobber)
writeout(1, Operand(result, reg_v12));
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
//! p_branch BB6
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
//! p_branch BB6
});
/* --- merge block --- */
@@ -788,7 +781,7 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
Operand::c32(0x40018u));
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
//! p_cbranch_nz BB1, BB2
emit_divergent_if_else(
program.get(), bld, e,
@@ -803,18 +796,18 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
bld.mubuf(aco_opcode::buffer_store_dword, f, c, Operand::zero(), d, 0, true);
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
//! p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
//! p_branch BB3
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
//! p_cbranch_nz BB4, BB5
},
[&]() -> void
{
@@ -823,12 +816,12 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
//! p_logical_start
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
//! p_branch BB6
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
//! p_branch BB6
});
/* --- merge block --- */
@@ -837,12 +830,12 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
//! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
//! s2: %tmp_salu:s[8-9], s1: %br_scc:scc = s_and_b64 %a:s[2-3], 0x40018
//! s2: %br_vcc:vcc = p_cbranch_z %br_scc:scc
//! p_unit_test 5, %br_vcc:vcc
//! p_cbranch_z %br_scc:scc
//! p_unit_test 5
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(tmp_salu, reg_s8),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(5, Operand(br, vcc));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
writeout(5);
finish_optimizer_postRA_test();
END_TEST
@@ -874,7 +867,7 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
Operand::c32(0x40018u));
//! s2: %saved_exec:s[84-85], s1: %0:scc, s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB1, BB2
//! p_cbranch_nz BB1, BB2
emit_divergent_if_else(
program.get(), bld, e,
@@ -892,18 +885,18 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
bld.mubuf(aco_opcode::buffer_store_dword, g, c, Operand(s_addr, reg_s3), d, 0, true);
//! p_logical_end
//! s2: %0:vcc = p_branch BB3
//! p_branch BB3
/* --- linear then --- */
//! BB2
//! /* logical preds: / linear preds: BB0, / kind: */
//! s2: %0:vcc = p_branch BB3
//! p_branch BB3
/* --- invert --- */
//! BB3
//! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
//! s2: %0:exec, s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
//! s2: %0:vcc = p_cbranch_nz BB4, BB5
//! p_cbranch_nz BB4, BB5
},
[&]() -> void
{
@@ -912,12 +905,12 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
//! /* logical preds: BB0, / linear preds: BB3, / kind: */
//! p_logical_start
//! p_logical_end
//! s2: %0:vcc = p_branch BB6
//! p_branch BB6
/* --- linear else --- */
//! BB5
//! /* logical preds: / linear preds: BB3, / kind: */
//! s2: %0:vcc = p_branch BB6
//! p_branch BB6
});
/* --- merge block --- */
@@ -926,12 +919,12 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
//! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
//! s1: %br_scc:scc = s_cmp_lg_u64 %tmp_salu:s[8-9], 0
//! s2: %br_vcc:vcc = p_cbranch_z %br_scc:scc
//! p_unit_test 5, %br_vcc:vcc
//! p_cbranch_z %br_scc:scc
//! p_unit_test 5
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), Operand(tmp_salu, reg_s8),
Operand::zero());
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
writeout(5, Operand(br, vcc));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(scmp));
writeout(5);
finish_optimizer_postRA_test();
END_TEST
+19 -19
View File
@@ -179,8 +179,8 @@ BEGIN_TEST(regalloc.branch_def_phis_at_merge_block)
program->blocks[0].kind &= ~block_kind_top_level;
//! s2: %_:s[2-3] = p_branch
bld.branch(aco_opcode::p_branch, bld.def(s2));
//! p_branch
bld.branch(aco_opcode::p_branch);
//! BB1
//! /* logical preds: / linear preds: BB0, / kind: uniform, */
@@ -204,8 +204,8 @@ BEGIN_TEST(regalloc.branch_def_phis_at_branch_block)
//! s2: %tmp:s[0-1] = p_unit_test
Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
//! s2: %_:s[2-3] = p_cbranch_z %0:scc
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(scc, s1));
//! p_cbranch_z %0:scc
bld.branch(aco_opcode::p_cbranch_z, Operand(scc, s1));
//! BB1
//! /* logical preds: / linear preds: BB0, / kind: */
@@ -214,12 +214,12 @@ BEGIN_TEST(regalloc.branch_def_phis_at_branch_block)
//! p_unit_test %tmp:s[0-1]
bld.pseudo(aco_opcode::p_unit_test, tmp);
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
bld.reset(program->create_and_insert_block());
program->blocks[2].linear_preds.push_back(0);
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
bld.reset(program->create_and_insert_block());
program->blocks[3].linear_preds.push_back(1);
@@ -522,12 +522,12 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def)
Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc));
//! lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28]
//~gfx8_cbranch! s2: %_:s[0-1] = p_cbranch_z %scc_tmp:scc
//~gfx8_branch! s2: %_:s[0-1] = p_branch
//~gfx8_cbranch! p_cbranch_z %scc_tmp:scc
//~gfx8_branch! p_branch
if (cbr)
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), bld.scc(scc_tmp));
bld.branch(aco_opcode::p_cbranch_z, bld.scc(scc_tmp));
else
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
//! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
@@ -536,9 +536,9 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def)
program->blocks[1].logical_preds.push_back(0);
//! v29: %_:v[0-28] = p_unit_test
//! s2: %_:s[0-1] = p_branch
//! p_branch
bld.pseudo(aco_opcode::p_unit_test, bld.def(RegClass::get(RegType::vgpr, 29 * 4)));
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
//! BB2
//! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, top-level, */
@@ -554,7 +554,7 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def)
finish_ra_test(ra_test_policy());
//~gfx8_cbranch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] needs_scratch:1 scratch:s1
//~gfx8_cbranch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] needs_scratch:1 scratch:s0
//~gfx8_branch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] needs_scratch:1 scratch:s253
aco_ptr<Instruction>& parallelcopy = program->blocks[0].instructions[6];
aco_print_instr(program->gfx_level, parallelcopy.get(), output);
@@ -584,12 +584,12 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_phis)
end_linear_vgpr(ltmp1);
//! lv1: %ltmp2_2:v[30] = p_parallelcopy %ltmp2:v[29]
//~gfx8_cbranch! s2: %_:s[0-1] = p_cbranch_z %_:scc
//~gfx8_branch! s2: %_:s[0-1] = p_branch
//~gfx8_cbranch! p_cbranch_z %_:scc
//~gfx8_branch! p_branch
if (cbr)
bld.branch(aco_opcode::p_cbranch_z, bld.def(s2), Operand(scc, s1));
bld.branch(aco_opcode::p_cbranch_z, Operand(scc, s1));
else
bld.branch(aco_opcode::p_branch, bld.def(s2));
bld.branch(aco_opcode::p_branch);
//! BB1
//! /* logical preds: BB0, / linear preds: BB0, / kind: */
@@ -597,8 +597,8 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_phis)
program->blocks[1].linear_preds.push_back(0);
program->blocks[1].logical_preds.push_back(0);
//! s2: %_:s[0-1] = p_branch
bld.branch(aco_opcode::p_branch, bld.def(s2));
//! p_branch
bld.branch(aco_opcode::p_branch);
//! BB2
//! /* logical preds: BB1, / linear preds: BB1, / kind: uniform, top-level, */