From d1c7244b6f2033d7602350c44233dd0eea8e5ea3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Mon, 27 Jan 2025 15:46:41 +0100 Subject: [PATCH] r300: reswizzle some shadow texture calculations to use w channel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevents shader-db regression, since NIR puts just everything to x and the pair scheduling is not smart enough to fix it later, espetially after presubtract happens. Together with the previous patches that move the shadow lovering to NIR this results in the following shader-db stats: RV530: total instructions in shared programs: 129598 -> 128697 (-0.70%) instructions in affected programs: 19001 -> 18100 (-4.74%) helped: 124 HURT: 9 total presub in shared programs: 8554 -> 8314 (-2.81%) presub in affected programs: 1099 -> 859 (-21.84%) helped: 17 HURT: 50 total temps in shared programs: 17514 -> 17000 (-2.93%) temps in affected programs: 1368 -> 854 (-37.57%) helped: 45 HURT: 18 total cycles in shared programs: 191286 -> 190593 (-0.36%) cycles in affected programs: 32369 -> 31676 (-2.14%) helped: 110 HURT: 29 RV410: total instructions in shared programs: 112805 -> 112618 (-0.17%) instructions in affected programs: 7089 -> 6902 (-2.64%) helped: 65 HURT: 21 total presub in shared programs: 3244 -> 3314 (2.16%) presub in affected programs: 228 -> 298 (30.70%) helped: 7 HURT: 47 total temps in shared programs: 18163 -> 18138 (-0.14%) temps in affected programs: 710 -> 685 (-3.52%) helped: 36 HURT: 19 total cycles in shared programs: 169530 -> 169369 (-0.09%) cycles in affected programs: 11304 -> 11143 (-1.42%) helped: 63 HURT: 27 The stats are a bit less awesome for Rv410, since it can't run the heavy Unigine Tropics shaders where this helps the most due to the TEX indirection limits. Signed-off-by: Pavel Ondračka Reviewed-by: Filip Gawin Part-of: --- .../drivers/r300/compiler/r3xx_fragprog.c | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c index 266823ea702..128625cf81d 100644 --- a/src/gallium/drivers/r300/compiler/r3xx_fragprog.c +++ b/src/gallium/drivers/r300/compiler/r3xx_fragprog.c @@ -91,6 +91,50 @@ rc_convert_rgb_alpha(struct radeon_compiler *c, void *user) unsigned index = rc_find_free_temporary(c); rc_variable_change_dst(var, index, RC_MASK_W); } + + /* Here we attempt to convert some code specific for the shadow lowering to use the W + * channel. Most notably this prevents some unfavorable presubtract later. + * + * TODO: This should not be needed once we can properly vectorize the reference value + * comparisons. + */ + if (var->Inst->U.I.Opcode == RC_OPCODE_ADD && + var->Inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY && + var->Inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY && + var->Inst->U.I.DstReg.File == RC_FILE_TEMPORARY && + var->Inst->U.I.DstReg.WriteMask == RC_MASK_X) { + unsigned have_tex = false; + struct rc_variable *fsat = NULL; + for (unsigned int src = 0; src < 2; src++) { + struct rc_list *writer_list; + writer_list = rc_variable_list_get_writers(variables, RC_INSTRUCTION_NORMAL, + &var->Inst->U.I.SrcReg[src]); + if (!writer_list || !writer_list->Item) + continue; + + struct rc_variable *src_variable = (struct rc_variable *)writer_list->Item; + struct rc_instruction *inst = src_variable->Inst; + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + + /* Here we check that the two sources are the depth texture and saturated MOV/MUL */ + if (info->HasTexture && inst->U.I.DstReg.WriteMask == RC_MASK_X && !have_tex && !src_variable->Friend) { + have_tex = true; + } + if ((inst->U.I.Opcode == RC_OPCODE_MOV || inst->U.I.Opcode == RC_OPCODE_ADD) && !fsat && + inst->U.I.SaturateMode != RC_SATURATE_NONE && inst->U.I.DstReg.WriteMask == RC_MASK_X && + !src_variable->Friend) { + fsat = src_variable; + } + } + + /* Move the calculations to W. */ + if (fsat && have_tex) { + unsigned index = rc_find_free_temporary(c); + rc_variable_change_dst(var, index, RC_MASK_W); + index = rc_find_free_temporary(c); + rc_variable_change_dst(fsat, index, RC_MASK_W); + } + } } }