From b48a101d8f54ac835c4d988ea56216fd435bbd8a Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 16 Mar 2024 11:48:15 +0100 Subject: [PATCH] aco/builder: improve v_mul_imm for negative imm Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_builder_h.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index f5106a29847..7b42d384f8a 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -414,9 +414,11 @@ public: return op.op.getTemp(); } - Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool bits24=false) + Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool tmpu24=false, bool tmpi24=false) { assert(tmp.type() == RegType::vgpr); + tmpu24 &= imm <= 0xffffffu; + tmpi24 &= imm <= 0x7fffffu || imm >= 0xff800000u; bool has_lshl_add = program->gfx_level >= GFX9; /* v_mul_lo_u32 has 1.6x the latency of most VALU on GFX10 (8 vs 5 cycles), * compared to 4x the latency on 2 && util_is_power_of_two_nonzero(imm + 1u)) { @@ -467,7 +473,7 @@ public: Result v_mul24_imm(Definition dst, Temp tmp, uint32_t imm) { - return v_mul_imm(dst, tmp, imm, true); + return v_mul_imm(dst, tmp, imm & 0xffffffu, true); } Result copy(Definition dst, Op op) {