From 2a8c5ebc77f563c98ae4f4f1775128d4cb30c13f Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Tue, 19 Aug 2025 08:35:38 +0200 Subject: [PATCH] ir3: enable scalar predicates Enable the use of scalar predicates by marking predicate dsts as uniform when possible during instruction emission and in opt_predicates. Totals: Instrs: 48207402 -> 47967272 (-0.50%); split: -0.54%, +0.05% CodeSize: 101907026 -> 101768626 (-0.14%); split: -0.15%, +0.01% NOPs: 8386320 -> 8165410 (-2.63%); split: -2.88%, +0.25% MOVs: 1468853 -> 1470546 (+0.12%); split: -0.17%, +0.28% COVs: 823724 -> 823746 (+0.00%); split: -0.01%, +0.01% Full: 1716708 -> 1716767 (+0.00%); split: -0.00%, +0.01% (ss): 1113167 -> 1168194 (+4.94%); split: -0.15%, +5.09% (sy): 552317 -> 552288 (-0.01%); split: -0.10%, +0.09% (ss)-stall: 4013046 -> 4261336 (+6.19%); split: -0.11%, +6.30% (sy)-stall: 16741190 -> 16748983 (+0.05%); split: -0.17%, +0.22% STPs: 18895 -> 18901 (+0.03%); split: -0.02%, +0.05% LDPs: 23853 -> 23762 (-0.38%); split: -0.39%, +0.01% Preamble Instrs: 11506988 -> 11493425 (-0.12%); split: -0.12%, +0.01% Early Preamble: 121339 -> 121695 (+0.29%) Last helper: 11686328 -> 11628618 (-0.49%); split: -0.72%, +0.23% Cat0: 9241457 -> 9020508 (-2.39%); split: -2.62%, +0.22% Cat1: 2353411 -> 2354860 (+0.06%); split: -0.17%, +0.23% Cat2: 17468471 -> 17447932 (-0.12%); split: -0.12%, +0.00% Cat6: 515728 -> 515643 (-0.02%); split: -0.02%, +0.00% Cat7: 1637795 -> 1637789 (-0.00%); split: -0.05%, +0.05% Totals from 33275 (20.20% of 164705) affected shaders: Instrs: 30329487 -> 30089357 (-0.79%); split: -0.86%, +0.07% CodeSize: 59715922 -> 59577522 (-0.23%); split: -0.26%, +0.03% NOPs: 6265422 -> 6044512 (-3.53%); split: -3.86%, +0.33% MOVs: 1058197 -> 1059890 (+0.16%); split: -0.23%, +0.39% COVs: 427513 -> 427535 (+0.01%); split: -0.02%, +0.03% Full: 548495 -> 548554 (+0.01%); split: -0.01%, +0.02% (ss): 769340 -> 824367 (+7.15%); split: -0.21%, +7.36% (sy): 368276 -> 368247 (-0.01%); split: -0.14%, +0.13% (ss)-stall: 3076333 -> 3324623 (+8.07%); split: -0.15%, +8.22% (sy)-stall: 10740547 -> 10748340 (+0.07%); split: -0.27%, +0.34% STPs: 12872 -> 12878 (+0.05%); split: -0.02%, +0.07% LDPs: 20808 -> 20717 (-0.44%); split: -0.45%, +0.01% Preamble Instrs: 6354490 -> 6340927 (-0.21%); split: -0.22%, +0.01% Early Preamble: 15233 -> 15589 (+2.34%) Last helper: 8106631 -> 8048921 (-0.71%); split: -1.04%, +0.32% Cat0: 6888653 -> 6667704 (-3.21%); split: -3.51%, +0.30% Cat1: 1541452 -> 1542901 (+0.09%); split: -0.25%, +0.35% Cat2: 10963398 -> 10942859 (-0.19%); split: -0.19%, +0.00% Cat6: 265945 -> 265860 (-0.03%); split: -0.03%, +0.00% Cat7: 1164800 -> 1164794 (-0.00%); split: -0.07%, +0.07% Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3_context.c | 13 ++++++++++++- src/freedreno/ir3/ir3_opt_predicates.c | 27 ++++++++++++++++++++------ 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 50d35386fd8..151eed319da 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -437,7 +437,18 @@ ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src) /* condition always goes in predicate register: */ cond->dsts[0]->flags |= IR3_REG_PREDICATE; - cond->dsts[0]->flags &= ~IR3_REG_SHARED; + + /* The builders will mark the dst as shared when both srcs are shared. + * Predicates can't be shared but do support the scalar ALU when marked as + * uniform. + */ + if (cond->dsts[0]->flags & IR3_REG_SHARED) { + cond->dsts[0]->flags &= ~IR3_REG_SHARED; + + if (ctx->compiler->has_scalar_predicates) { + cond->dsts[0]->flags |= IR3_REG_UNIFORM; + } + } _mesa_hash_table_insert(ctx->predicate_conversions, src, cond); return cond; diff --git a/src/freedreno/ir3/ir3_opt_predicates.c b/src/freedreno/ir3/ir3_opt_predicates.c index 976a3d19374..c578ec35f57 100644 --- a/src/freedreno/ir3/ir3_opt_predicates.c +++ b/src/freedreno/ir3/ir3_opt_predicates.c @@ -31,16 +31,23 @@ struct opt_predicates_ctx { }; static bool -is_shared_or_const(struct ir3_register *reg) +cat2_all_srcs_have_flag(struct ir3_instruction *instr, unsigned flags) { - return reg->flags & (IR3_REG_CONST | IR3_REG_SHARED); + return (instr->srcs[0]->flags & flags) && + (instr->srcs_count == 1 || (instr->srcs[1]->flags & flags)); } static bool cat2_needs_scalar_alu(struct ir3_instruction *instr) { - return is_shared_or_const(instr->srcs[0]) && - (instr->srcs_count == 1 || is_shared_or_const(instr->srcs[1])); + return cat2_all_srcs_have_flag(instr, IR3_REG_CONST | IR3_REG_SHARED); +} + +static bool +cat2_may_use_scalar_alu(struct ir3_instruction *instr) +{ + return cat2_all_srcs_have_flag( + instr, IR3_REG_CONST | IR3_REG_SHARED | IR3_REG_IMMED); } static struct ir3_instruction * @@ -58,6 +65,12 @@ clone_with_predicate_dst(struct opt_predicates_ctx *ctx, ir3_instr_move_after(clone, instr); clone->dsts[0]->flags |= IR3_REG_PREDICATE; clone->dsts[0]->flags &= ~(IR3_REG_HALF | IR3_REG_SHARED); + + if (ctx->ir->compiler->has_scalar_predicates && opc_cat(instr->opc) == 2 && + cat2_may_use_scalar_alu(instr)) { + clone->dsts[0]->flags |= IR3_REG_UNIFORM; + } + _mesa_hash_table_insert(ctx->predicate_clones, instr, clone); return clone; } @@ -70,14 +83,16 @@ can_write_predicate(struct opt_predicates_ctx *ctx, case OPC_CMPS_S: case OPC_CMPS_U: case OPC_CMPS_F: - return !cat2_needs_scalar_alu(instr); + return !cat2_needs_scalar_alu(instr) || + ctx->ir->compiler->has_scalar_predicates; case OPC_AND_B: case OPC_OR_B: case OPC_NOT_B: case OPC_XOR_B: case OPC_GETBIT_B: return ctx->ir->compiler->bitops_can_write_predicates && - !cat2_needs_scalar_alu(instr); + (!cat2_needs_scalar_alu(instr) || + ctx->ir->compiler->has_scalar_predicates); default: return false; }