r300: allow constant swizzles with inline constant
This will prevent a regression in the number of inlined constants in a later commit. Constructs like 4.000000 (0x48).w110 works just fine. There is a small behavioral change. We would previously allow positive and negative same-value contants to be produced, e.g., 4.000000 (0x48).w-w__ and this would be later split into some extra movs in the dataflow swizzle pass. We now explicitly check that the final swizzle is valid while inlining. So there is a minor decrease in inlined constants and in the total instructions. total lits in shared programs: 4328 -> 4194 (-3.10%) lits in affected programs: 554 -> 420 (-24.19%) total instructions in shared programs: 155488 -> 155361 (-0.08%) instructions in affected programs: 5707 -> 5580 (-2.23%) Additonally, a fix for pair translation is needed since the constant inlining can now produce swizzles like this: 4.000000 (0x48).w-0-0-_ so we have to teach pair translation to also ignore the sign for zero swizzle. Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reviewed-by: Filip Gawin <filip@gawin.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17560>
This commit is contained in:
@@ -28,6 +28,7 @@
|
||||
#include "radeon_dataflow.h"
|
||||
#include "radeon_program.h"
|
||||
#include "radeon_program_constants.h"
|
||||
#include "radeon_swizzle.h"
|
||||
#include "util/u_bitcast.h"
|
||||
#include <stdio.h>
|
||||
|
||||
@@ -104,32 +105,22 @@ void rc_inline_literals(struct radeon_compiler *c, void *user)
|
||||
/* We aren't using rc_for_all_reads_src here, because presub
|
||||
* sources need to be handled differently. */
|
||||
for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) {
|
||||
unsigned new_swizzle;
|
||||
unsigned use_literal = 0;
|
||||
unsigned negate_mask = 0;
|
||||
unsigned swz, chan;
|
||||
struct rc_src_register * src_reg =
|
||||
&inst->U.I.SrcReg[src_idx];
|
||||
swz = RC_SWIZZLE_UNUSED;
|
||||
if (src_reg->File != RC_FILE_CONSTANT) {
|
||||
struct rc_src_register src_reg = inst->U.I.SrcReg[src_idx];
|
||||
if (src_reg.File != RC_FILE_CONSTANT) {
|
||||
continue;
|
||||
}
|
||||
constant =
|
||||
&c->Program.Constants.Constants[src_reg->Index];
|
||||
&c->Program.Constants.Constants[src_reg.Index];
|
||||
if (constant->Type != RC_CONSTANT_IMMEDIATE) {
|
||||
continue;
|
||||
}
|
||||
new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
unsigned char r300_float_tmp;
|
||||
swz = GET_SWZ(src_reg->Swizzle, chan);
|
||||
if (swz == RC_SWIZZLE_UNUSED) {
|
||||
continue;
|
||||
}
|
||||
/* Don't try to inline constant swizzle */
|
||||
swz = GET_SWZ(src_reg.Swizzle, chan);
|
||||
if (swz >= RC_SWIZZLE_ZERO) {
|
||||
use_literal = 0;
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
float_value = constant->u.Immediate[swz];
|
||||
ret = ieee_754_to_r300_float(float_value,
|
||||
@@ -140,7 +131,7 @@ void rc_inline_literals(struct radeon_compiler *c, void *user)
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret == -1 && src_reg->Abs) {
|
||||
if (ret == -1 && src_reg.Abs) {
|
||||
use_literal = 0;
|
||||
break;
|
||||
}
|
||||
@@ -152,19 +143,18 @@ void rc_inline_literals(struct radeon_compiler *c, void *user)
|
||||
|
||||
/* Use RC_SWIZZLE_W for the inline constant, so
|
||||
* it will become one of the alpha sources. */
|
||||
SET_SWZ(new_swizzle, chan, RC_SWIZZLE_W);
|
||||
SET_SWZ(src_reg.Swizzle, chan, RC_SWIZZLE_W);
|
||||
if (ret == -1) {
|
||||
negate_mask |= (1 << chan);
|
||||
src_reg.Negate ^= (1 << chan);
|
||||
}
|
||||
}
|
||||
|
||||
if (!use_literal) {
|
||||
src_reg.File = RC_FILE_INLINE;
|
||||
src_reg.Index = r300_float;
|
||||
if (!use_literal || !c->SwizzleCaps->IsNative(inst->U.I.Opcode, src_reg)) {
|
||||
continue;
|
||||
}
|
||||
src_reg->File = RC_FILE_INLINE;
|
||||
src_reg->Index = r300_float;
|
||||
src_reg->Swizzle = new_swizzle;
|
||||
src_reg->Negate = src_reg->Negate ^ negate_mask;
|
||||
inst->U.I.SrcReg[src_idx] = src_reg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -230,7 +230,23 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
|
||||
else if (swz == RC_SWIZZLE_W)
|
||||
srcalpha = 1;
|
||||
|
||||
if (swz < RC_SWIZZLE_UNUSED)
|
||||
/* We check for ZERO here as well because otherwise the zero
|
||||
* sign (which doesn't matter and we already ignore it previously
|
||||
* when checking for valid swizzle) could mess up the final negate sign.
|
||||
* Example problematic pattern where this would be produced is:
|
||||
* CONST[1] FLT32 { 0.0000, 0.0000, -4.0000, 0.0000}
|
||||
* ADD temp[0].xyz, const[0].xyz_, -const[1].z00_;
|
||||
*
|
||||
* after inline literals would become:
|
||||
* ADD temp[0].xyz, const[0].xyz_, 4.000000 (0x48).w-0-0-_;
|
||||
*
|
||||
* and after pair translate:
|
||||
* src0.xyz = const[0], src0.w = 4.000000 (0x48)
|
||||
* MAD temp[0].xyz, src0.xyz, src0.111, src0.w00
|
||||
*
|
||||
* Without the zero check there would be -src0.w00.
|
||||
*/
|
||||
if (swz < RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
|
||||
srcmask |= 1 << j;
|
||||
}
|
||||
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
|
||||
|
||||
Reference in New Issue
Block a user