From 40422927dcb1249cdb1444557aca671387140428 Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Tue, 23 Apr 2024 15:15:16 +0200 Subject: [PATCH] nak: Pass has_mod to all form of src2 requiring it This was missing from the original changes and was causing HFMA2 to misbehave with an immediate value. Also fix inverted value passed around for cbuf and ureg forms. Fixes: bad23ddb484 ("nak: Add F16 and F16v2 sources") Signed-off-by: Mary Guillemard Part-of: --- src/nouveau/compiler/nak/encode_sm70.rs | 40 ++++++++++++++----------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/nouveau/compiler/nak/encode_sm70.rs b/src/nouveau/compiler/nak/encode_sm70.rs index 34a1dca56e9..3245f04ed03 100644 --- a/src/nouveau/compiler/nak/encode_sm70.rs +++ b/src/nouveau/compiler/nak/encode_sm70.rs @@ -109,6 +109,14 @@ impl ALUSrc { assert!(src.is_uniform()); ALUSrc::from_src_file(src, RegFile::UGPR) } + + pub fn has_src_mod(&self) -> bool { + match self { + ALUSrc::Reg(reg) | ALUSrc::UReg(reg) => reg.abs || reg.neg, + ALUSrc::CBuf(cb) => cb.abs || cb.neg, + _ => false, + } + } } struct SM70Instr { @@ -399,10 +407,14 @@ impl SM70Instr { self.set_dst(dst); } - // For opcodes like OpHAdd, both sources support full modifiers and swizzle, - // even when we use a form where the two sources go in src0 and src2. - // For OpHFma, however, which uses both src1 and src2, only src1 supports modifiers. - let src2_has_mod = !is_fp16_alu || matches!(src1, ALUSrc::None); + // Bits 74..76 are used both for the swizzle on src0 and for the source + // modifier for the register source of src1 and src2. When both are + // registers, it's used for src2. The hardware elects to always support + // a swizzle and not support source modifiers in that case. + let bit74_75_are_mod = !is_fp16_alu + || matches!(src1, ALUSrc::None) + || matches!(src2, ALUSrc::None); + debug_assert!(bit74_75_are_mod || !src0.has_src_mod()); self.set_alu_reg_src(24..32, 73, 72, 74..76, is_fp16_alu, true, &src0); @@ -414,7 +426,7 @@ impl SM70Instr { 75, 81..83, is_fp16_alu, - src2_has_mod, + bit74_75_are_mod, &src2, ); @@ -469,7 +481,7 @@ impl SM70Instr { 63, 60..62, is_fp16_alu, - src2_has_mod, + true, reg2, ); self.set_alu_reg_src( @@ -478,7 +490,7 @@ impl SM70Instr { 75, 81..83, is_fp16_alu, - true, + bit74_75_are_mod, &src1, ); 7_u8 // form @@ -491,29 +503,21 @@ impl SM70Instr { 75, 81..83, is_fp16_alu, - true, + bit74_75_are_mod, &src1, ); 2_u8 // form } ALUSrc::CBuf(cb) => { // TODO set_src_cx - self.set_alu_cb( - 38..59, - 62, - 63, - 60..62, - is_fp16_alu, - src2_has_mod, - cb, - ); + self.set_alu_cb(38..59, 62, 63, 60..62, is_fp16_alu, true, cb); self.set_alu_reg_src( 64..72, 74, 75, 81..83, is_fp16_alu, - true, + bit74_75_are_mod, &src1, ); 3_u8 // form