nak/sm70: Add support for encoding uniform ALU ops

This requires a pretty significant rework of encode_alu_base().  In
particular, we can't know the register file that's going to be used
until we get into encode_alu_base() so ALUSrc::from_src() can't handle
Zero itself.  Instead, we defer to a new ALUSrc::with_op_uniformity()
helper which does a postprocess step.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29591>
This commit is contained in:
Faith Ekstrand
2024-05-28 15:30:30 -05:00
committed by Marge Bot
parent 8d2d2db6a0
commit 21b1eb8da7
+123 -23
View File
@@ -60,7 +60,7 @@ fn dst_is_bar(dst: Dst) -> bool {
}
impl ALUSrc {
fn from_src_file(src: Option<&Src>, file: RegFile) -> ALUSrc {
fn from_src(src: Option<&Src>, op_is_uniform: bool) -> ALUSrc {
let Some(src) = src else {
return ALUSrc::None;
};
@@ -68,22 +68,33 @@ impl ALUSrc {
match src.src_ref {
SrcRef::Zero | SrcRef::Reg(_) => {
let reg = match src.src_ref {
SrcRef::Zero => RegRef::zero(file, 1),
SrcRef::Zero => {
let file = if op_is_uniform {
RegFile::UGPR
} else {
RegFile::GPR
};
RegRef::zero(file, 1)
}
SrcRef::Reg(reg) => reg,
_ => panic!("Invalid source ref"),
};
assert!(reg.comps() <= 2);
assert!(reg.file() == file);
let alu_ref = ALURegRef {
reg: reg,
abs: src_mod_has_abs(src.src_mod),
neg: src_mod_has_neg(src.src_mod),
swizzle: src.src_swizzle,
};
match reg.file() {
RegFile::GPR => ALUSrc::Reg(alu_ref),
RegFile::UGPR => ALUSrc::UReg(alu_ref),
_ => panic!("Invalid ALU register file"),
if op_is_uniform {
assert!(reg.file() == RegFile::UGPR);
ALUSrc::Reg(alu_ref)
} else {
match reg.file() {
RegFile::GPR => ALUSrc::Reg(alu_ref),
RegFile::UGPR => ALUSrc::UReg(alu_ref),
_ => panic!("Invalid ALU register file"),
}
}
}
SrcRef::Imm32(i) => {
@@ -104,10 +115,6 @@ impl ALUSrc {
}
}
pub fn from_src(src: Option<&Src>) -> ALUSrc {
ALUSrc::from_src_file(src, RegFile::GPR)
}
pub fn has_src_mod(&self) -> bool {
match self {
ALUSrc::Reg(reg) | ALUSrc::UReg(reg) => reg.abs || reg.neg,
@@ -298,11 +305,16 @@ impl SM70Instr {
abs_bit: usize,
neg_bit: usize,
swizzle_range: Range<usize>,
file: RegFile,
is_fp16_alu: bool,
has_mod: bool,
reg: &ALURegRef,
) {
self.set_reg(range, reg.reg);
match file {
RegFile::GPR => self.set_reg(range, reg.reg),
RegFile::UGPR => self.set_ureg(range, reg.reg),
_ => panic!("Invalid ALU src register file"),
}
if has_mod {
self.set_bit(abs_bit, reg.abs);
@@ -318,18 +330,24 @@ impl SM70Instr {
}
}
fn encode_alu_src0(&mut self, src: &ALUSrc, is_fp16_alu: bool) {
fn encode_alu_src0(
&mut self,
src: &ALUSrc,
file: RegFile,
is_fp16_alu: bool,
) {
let reg = match src {
ALUSrc::None => return,
ALUSrc::Reg(reg) => reg,
_ => panic!("Invalid ALU src"),
};
self.set_alu_reg(24..32, 73, 72, 74..76, is_fp16_alu, true, reg);
self.set_alu_reg(24..32, 73, 72, 74..76, file, is_fp16_alu, true, reg);
}
fn encode_alu_src2(
&mut self,
src: &ALUSrc,
file: RegFile,
is_fp16_alu: bool,
bit74_75_are_mod: bool,
) {
@@ -343,6 +361,7 @@ impl SM70Instr {
74,
75,
81..83,
file,
is_fp16_alu,
bit74_75_are_mod,
reg,
@@ -350,7 +369,16 @@ impl SM70Instr {
}
fn encode_alu_reg(&mut self, reg: &ALURegRef, is_fp16_alu: bool) {
self.set_alu_reg(32..40, 62, 63, 60..62, is_fp16_alu, true, reg);
self.set_alu_reg(
32..40,
62,
63,
60..62,
RegFile::GPR,
is_fp16_alu,
true,
reg,
);
}
fn encode_alu_ureg(&mut self, reg: &ALURegRef, is_fp16_alu: bool) {
@@ -363,6 +391,8 @@ impl SM70Instr {
} else {
assert!(reg.swizzle == SrcSwizzle::None);
}
self.set_bit(91, true);
}
fn encode_alu_imm(&mut self, imm: &u32) {
@@ -394,9 +424,9 @@ impl SM70Instr {
self.set_dst(*dst);
}
let src0 = ALUSrc::from_src(src0);
let src1 = ALUSrc::from_src(src1);
let src2 = ALUSrc::from_src(src2);
let src0 = ALUSrc::from_src(src0, false);
let src1 = ALUSrc::from_src(src1, false);
let src2 = ALUSrc::from_src(src2, false);
// Bits 74..76 are used both for the swizzle on src0 and for the source
// modifier for the register source of src1 and src2. When both are
@@ -407,11 +437,16 @@ impl SM70Instr {
|| matches!(src2, ALUSrc::None);
debug_assert!(bit74_75_are_mod || !src0.has_src_mod());
self.encode_alu_src0(&src0, is_fp16_alu);
self.encode_alu_src0(&src0, RegFile::GPR, is_fp16_alu);
let form = match &src2 {
ALUSrc::None | ALUSrc::Reg(_) => {
self.encode_alu_src2(&src2, is_fp16_alu, bit74_75_are_mod);
self.encode_alu_src2(
&src2,
RegFile::GPR,
is_fp16_alu,
bit74_75_are_mod,
);
match &src1 {
ALUSrc::None => 1_u8, // form
ALUSrc::Reg(reg1) => {
@@ -434,18 +469,33 @@ impl SM70Instr {
}
ALUSrc::UReg(reg2) => {
self.encode_alu_ureg(reg2, is_fp16_alu);
self.encode_alu_src2(&src1, is_fp16_alu, bit74_75_are_mod);
self.encode_alu_src2(
&src1,
RegFile::GPR,
is_fp16_alu,
bit74_75_are_mod,
);
7_u8 // form
}
ALUSrc::Imm32(imm2) => {
self.encode_alu_imm(imm2);
self.encode_alu_src2(&src1, is_fp16_alu, bit74_75_are_mod);
self.encode_alu_src2(
&src1,
RegFile::GPR,
is_fp16_alu,
bit74_75_are_mod,
);
2_u8 // form
}
ALUSrc::CBuf(cb2) => {
// TODO set_src_cx
self.encode_alu_cb(cb2, is_fp16_alu);
self.encode_alu_src2(&src1, is_fp16_alu, bit74_75_are_mod);
self.encode_alu_src2(
&src1,
RegFile::GPR,
is_fp16_alu,
bit74_75_are_mod,
);
3_u8 // form
}
};
@@ -476,6 +526,56 @@ impl SM70Instr {
self.encode_alu_base(opcode, dst, src0, src1, src2, true);
}
fn encode_ualu(
&mut self,
opcode: u16,
dst: Option<&Dst>,
src0: Option<&Src>,
src1: Option<&Src>,
src2: Option<&Src>,
) {
if let Some(dst) = dst {
self.set_udst(*dst);
}
let src0 = ALUSrc::from_src(src0, true);
let src1 = ALUSrc::from_src(src1, true);
let src2 = ALUSrc::from_src(src2, true);
// All uniform ALU requires bit 91 set
self.set_bit(91, true);
self.encode_alu_src0(&src0, RegFile::UGPR, false);
let form = match &src2 {
ALUSrc::None | ALUSrc::Reg(_) => {
self.encode_alu_src2(&src2, RegFile::UGPR, false, true);
match &src1 {
ALUSrc::None => 1_u8, // form
ALUSrc::Reg(reg1) => {
self.encode_alu_ureg(reg1, false);
1_u8 // form
}
ALUSrc::UReg(_) => panic!("UALU never has UReg"),
ALUSrc::Imm32(imm1) => {
self.encode_alu_imm(imm1);
4_u8 // form
}
ALUSrc::CBuf(_) => panic!("UALU does not support cbufs"),
}
}
ALUSrc::UReg(_) => panic!("UALU never has UReg"),
ALUSrc::Imm32(imm2) => {
self.encode_alu_imm(imm2);
self.encode_alu_src2(&src1, RegFile::UGPR, false, true);
2_u8 // form
}
ALUSrc::CBuf(_) => panic!("UALU does not support cbufs"),
};
self.set_field(0..9, opcode);
self.set_field(9..12, form);
}
fn set_instr_deps(&mut self, deps: &InstrDeps) {
self.set_field(105..109, deps.delay);
self.set_bit(109, deps.yld);