diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index d7e302ef619..ec4f6667f3e 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1412,6 +1412,11 @@ opcode("prmt_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], dst |= ((uint32_t)x) << i * 8; }""") +# Address arithmetic instructions: shift and add +# Shift must be a constant. +opcode("lea_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], False, + "", "src0 + (src1 << (src2 % bit_size))") + # 24b multiply into 32b result (with sign extension) binop("imul24", tint32, _2src_commutative + associative, "(((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8)") diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 16c1b0bfbad..96c47713dd4 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -541,6 +541,24 @@ is_used_by_non_fsat(const nir_alu_instr *instr) return false; } +static inline bool +is_used_by_non_ldc_nv(const nir_alu_instr *instr) +{ + nir_foreach_use(src, &instr->def) { + const nir_instr *const user_instr = nir_src_parent_instr(src); + + if (user_instr->type != nir_instr_type_intrinsic) + return true; + + const nir_intrinsic_instr *const user_intrin = nir_instr_as_intrinsic(user_instr); + + if (user_intrin->intrinsic != nir_intrinsic_ldc_nv) + return true; + } + + return false; +} + static inline bool is_only_used_as_float_impl(const nir_alu_instr *instr, unsigned depth) { diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index d21e0920032..c9fb98a1be7 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -612,6 +612,24 @@ pub trait SSABuilder: Builder { dst } + fn lea(&mut self, a: Src, b: Src, shift: u8) -> SSARef { + let dst = self.alloc_ssa(RegFile::GPR, 1); + assert!(self.sm() >= 70); + + self.push_op(OpLea { + dst: dst.into(), + overflow: Dst::None, + a: a, + b: b, + a_high: 0.into(), + dst_high: false, + shift: shift % 32, + intermediate_mod: SrcMod::None, + }); + + dst + } + fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef { let dst = if x.is_predicate() { self.alloc_ssa(RegFile::Pred, 1) diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 80c4b8b767c..909a366d990 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -1478,6 +1478,12 @@ impl<'a> ShaderFromNir<'a> { b.shr(srcs[0], srcs[1], true) } } + nir_op_lea_nv => { + let src_a = srcs[1]; + let src_b = srcs[0]; + let shift = nir_srcs[2].comp_as_uint(0).unwrap() as u8; + b.lea(src_a, src_b, shift) + } nir_op_isub => match alu.def.bit_size { 32 => b.iadd(srcs[0], srcs[1].ineg(), 0.into()), 64 => b.iadd64(srcs[0], srcs[1].ineg(), 0.into()), diff --git a/src/nouveau/compiler/nak_nir_algebraic.py b/src/nouveau/compiler/nak_nir_algebraic.py index 860979a9d19..4f8a4004917 100644 --- a/src/nouveau/compiler/nak_nir_algebraic.py +++ b/src/nouveau/compiler/nak_nir_algebraic.py @@ -27,6 +27,7 @@ import sys a = 'a' b = 'b' c = 'c' +s = 's' # common conditions to improve readability volta = 'nak->sm >= 70 && nak->sm < 75' @@ -38,6 +39,9 @@ algebraic_lowering = [ (('umin', 'a', 'b'), ('bcsel', ('ult', a, b), a, b), volta), (('umax', 'a', 'b'), ('bcsel', ('ult', a, b), b, a), volta), (('iadd', 'a@64', ('ineg', 'b@64')), ('isub', a, b)), + + (('iadd(is_used_by_non_ldc_nv)', 'a@32', ('ishl', 'b@32', '#s@32')), + ('lea_nv', a, b, s), 'nak->sm >= 70'), ] def main():