r300: move the ARL merging pass up in the opt loop
Specifically after the first copy propagate run but before the second one. Removal of ARLs will enable the copy propagate to be more aggresive, as it is very carefull in such cases. shader-db RV530: total instructions in shared programs: 131861 -> 131503 (-0.27%) instructions in affected programs: 23949 -> 23591 (-1.49%) helped: 199 HURT: 15 total temps in shared programs: 16997 -> 16903 (-0.55%) temps in affected programs: 767 -> 673 (-12.26%) helped: 69 HURT: 9 RV370: total instructions in shared programs: 82360 -> 82027 (-0.40%) instructions in affected programs: 19516 -> 19183 (-1.71%) helped: 183 HURT: 15 total temps in shared programs: 12370 -> 12262 (-0.87%) temps in affected programs: 664 -> 556 (-16.27%) helped: 73 HURT: 0 The hurt programs are due to some constant load being copy propagated which leads to bad interaction with source conflict resolve pass later. v2: add missing shader type initialized to the tests. Previously we were checking for has_omod which also practically means we have a fragment shader, however its less readable. Reviewed-by: Emma Anholt <emma@anholt.net> Reviewed-by: Filip Gawin <filip.gawin@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23560>
This commit is contained in:
committed by
Marge Bot
parent
453201fe74
commit
f82574fb2c
@@ -1374,6 +1374,21 @@ static void merge_ARL(struct radeon_compiler * c, struct rc_instruction * inst)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply various optimizations specific to the A0 adress register loads.
|
||||
*/
|
||||
static void optimize_A0_loads(struct radeon_compiler * c) {
|
||||
struct rc_instruction * inst = c->Program.Instructions.Next;
|
||||
|
||||
while (inst != &c->Program.Instructions) {
|
||||
struct rc_instruction * cur = inst;
|
||||
inst = inst->Next;
|
||||
if (cur->U.I.Opcode == RC_OPCODE_ARL) {
|
||||
merge_ARL(c, cur);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rc_optimize(struct radeon_compiler * c, void *user)
|
||||
{
|
||||
struct rc_instruction * inst = c->Program.Instructions.Next;
|
||||
@@ -1393,6 +1408,10 @@ void rc_optimize(struct radeon_compiler * c, void *user)
|
||||
}
|
||||
}
|
||||
|
||||
if (c->type == RC_VERTEX_PROGRAM) {
|
||||
optimize_A0_loads(c);
|
||||
}
|
||||
|
||||
/* Merge MOVs to same source in different channels using the constant
|
||||
* swizzle.
|
||||
*/
|
||||
@@ -1419,6 +1438,10 @@ void rc_optimize(struct radeon_compiler * c, void *user)
|
||||
}
|
||||
}
|
||||
|
||||
if (c->type != RC_FRAGMENT_PROGRAM) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Presubtract operations. */
|
||||
inst = c->Program.Instructions.Next;
|
||||
while(inst != &c->Program.Instructions) {
|
||||
@@ -1427,19 +1450,7 @@ void rc_optimize(struct radeon_compiler * c, void *user)
|
||||
peephole(c, cur);
|
||||
}
|
||||
|
||||
|
||||
if (!c->has_omod) {
|
||||
inst = c->Program.Instructions.Next;
|
||||
while (inst != &c->Program.Instructions) {
|
||||
struct rc_instruction * cur = inst;
|
||||
inst = inst->Next;
|
||||
if (cur->U.I.Opcode == RC_OPCODE_ARL) {
|
||||
merge_ARL(c, cur);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Output modifiers. */
|
||||
inst = c->Program.Instructions.Next;
|
||||
struct rc_list * var_list = NULL;
|
||||
while(inst != &c->Program.Instructions) {
|
||||
|
||||
@@ -513,6 +513,7 @@ void init_compiler(
|
||||
rc_init_regalloc_state(rs, program_type);
|
||||
rc_init(c, rs);
|
||||
|
||||
c->type = program_type;
|
||||
c->is_r500 = is_r500;
|
||||
c->max_temp_regs = is_r500 ? 128 : (is_r400 ? 64 : 32);
|
||||
c->max_constants = is_r500 ? 256 : 32;
|
||||
|
||||
Reference in New Issue
Block a user