r300: reswizzle some shadow texture calculations to use w channel
Prevents shader-db regression, since NIR puts just everything to x and the pair scheduling is not smart enough to fix it later, espetially after presubtract happens. Together with the previous patches that move the shadow lovering to NIR this results in the following shader-db stats: RV530: total instructions in shared programs: 129598 -> 128697 (-0.70%) instructions in affected programs: 19001 -> 18100 (-4.74%) helped: 124 HURT: 9 total presub in shared programs: 8554 -> 8314 (-2.81%) presub in affected programs: 1099 -> 859 (-21.84%) helped: 17 HURT: 50 total temps in shared programs: 17514 -> 17000 (-2.93%) temps in affected programs: 1368 -> 854 (-37.57%) helped: 45 HURT: 18 total cycles in shared programs: 191286 -> 190593 (-0.36%) cycles in affected programs: 32369 -> 31676 (-2.14%) helped: 110 HURT: 29 RV410: total instructions in shared programs: 112805 -> 112618 (-0.17%) instructions in affected programs: 7089 -> 6902 (-2.64%) helped: 65 HURT: 21 total presub in shared programs: 3244 -> 3314 (2.16%) presub in affected programs: 228 -> 298 (30.70%) helped: 7 HURT: 47 total temps in shared programs: 18163 -> 18138 (-0.14%) temps in affected programs: 710 -> 685 (-3.52%) helped: 36 HURT: 19 total cycles in shared programs: 169530 -> 169369 (-0.09%) cycles in affected programs: 11304 -> 11143 (-1.42%) helped: 63 HURT: 27 The stats are a bit less awesome for Rv410, since it can't run the heavy Unigine Tropics shaders where this helps the most due to the TEX indirection limits. Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reviewed-by: Filip Gawin <None> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33066>
This commit is contained in:
committed by
Marge Bot
parent
296da387c0
commit
d1c7244b6f
@@ -91,6 +91,50 @@ rc_convert_rgb_alpha(struct radeon_compiler *c, void *user)
|
||||
unsigned index = rc_find_free_temporary(c);
|
||||
rc_variable_change_dst(var, index, RC_MASK_W);
|
||||
}
|
||||
|
||||
/* Here we attempt to convert some code specific for the shadow lowering to use the W
|
||||
* channel. Most notably this prevents some unfavorable presubtract later.
|
||||
*
|
||||
* TODO: This should not be needed once we can properly vectorize the reference value
|
||||
* comparisons.
|
||||
*/
|
||||
if (var->Inst->U.I.Opcode == RC_OPCODE_ADD &&
|
||||
var->Inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
|
||||
var->Inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
|
||||
var->Inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
|
||||
var->Inst->U.I.DstReg.WriteMask == RC_MASK_X) {
|
||||
unsigned have_tex = false;
|
||||
struct rc_variable *fsat = NULL;
|
||||
for (unsigned int src = 0; src < 2; src++) {
|
||||
struct rc_list *writer_list;
|
||||
writer_list = rc_variable_list_get_writers(variables, RC_INSTRUCTION_NORMAL,
|
||||
&var->Inst->U.I.SrcReg[src]);
|
||||
if (!writer_list || !writer_list->Item)
|
||||
continue;
|
||||
|
||||
struct rc_variable *src_variable = (struct rc_variable *)writer_list->Item;
|
||||
struct rc_instruction *inst = src_variable->Inst;
|
||||
const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
|
||||
/* Here we check that the two sources are the depth texture and saturated MOV/MUL */
|
||||
if (info->HasTexture && inst->U.I.DstReg.WriteMask == RC_MASK_X && !have_tex && !src_variable->Friend) {
|
||||
have_tex = true;
|
||||
}
|
||||
if ((inst->U.I.Opcode == RC_OPCODE_MOV || inst->U.I.Opcode == RC_OPCODE_ADD) && !fsat &&
|
||||
inst->U.I.SaturateMode != RC_SATURATE_NONE && inst->U.I.DstReg.WriteMask == RC_MASK_X &&
|
||||
!src_variable->Friend) {
|
||||
fsat = src_variable;
|
||||
}
|
||||
}
|
||||
|
||||
/* Move the calculations to W. */
|
||||
if (fsat && have_tex) {
|
||||
unsigned index = rc_find_free_temporary(c);
|
||||
rc_variable_change_dst(var, index, RC_MASK_W);
|
||||
index = rc_find_free_temporary(c);
|
||||
rc_variable_change_dst(fsat, index, RC_MASK_W);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user