nak: Simplify I/O gathering

This adds mark_attrs_(read|written)() helpers to VtgIoInfo which take a
range of attribute addresses and mark the range as-needed.  It adds a
similar mark_attr_read() helper to FragmentIoInfo which only marks a
single address and takes a PixelImap.  This gets us down to only needing
to duplicate the address range if ladder twice.  For VTG I/O, having it
take ranges will be more ergonamic when it comes time to handle non-
constant I/O offsets.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
Faith Ekstrand
2023-09-26 15:21:49 -05:00
committed by Marge Bot
parent 32910d3016
commit a4ff5a9fc6
2 changed files with 77 additions and 120 deletions
+23 -120
View File
@@ -1370,28 +1370,13 @@ impl<'a> ShaderFromNir<'a> {
+ u16::try_from(intrin.component()).unwrap() * 4;
for c in 0..comps {
let attribute_id = addr + 4 * u16::from(c);
let c_addr = addr + 4 * u16::from(c);
if attribute_id < 0x080 {
io.sysvals_in.ab |= 1 << (attribute_id / 4);
} else if attribute_id >= 0x080
&& attribute_id < 0x280
{
let user_attribute_index =
(attribute_id - 0x080) as usize / 4;
io.attr_in[user_attribute_index] =
PixelImap::Constant;
} else if attribute_id >= 0x2c0
&& attribute_id < 0x300
{
io.sysvals_in.c |=
1 << ((attribute_id - 0x2c0) / 4);
}
io.mark_attr_read(c_addr, PixelImap::Constant);
b.push_op(OpIpa {
dst: dst[usize::from(c)].into(),
addr: attribute_id,
addr: c_addr,
freq: InterpFreq::Constant,
loc: InterpLoc::Default,
offset: SrcRef::Zero.into(),
@@ -1402,6 +1387,9 @@ impl<'a> ShaderFromNir<'a> {
let addr = u16::try_from(intrin.base()).unwrap()
+ u16::try_from(intrin.component()).unwrap() * 4;
let addr_range = addr..(addr + 4 * u16::from(comps));
io.mark_attrs_read(addr_range);
let access = AttrAccess {
addr: addr,
comps: comps,
@@ -1410,29 +1398,6 @@ impl<'a> ShaderFromNir<'a> {
flags: 0,
};
let attribute_base_index = access.addr / 4;
for attribute_index in attribute_base_index
..attribute_base_index + access.comps as u16
{
let attribute_id = attribute_index * 4;
if attribute_id < 0x080 {
io.sysvals_in.ab |= 1 << (attribute_id / 4);
} else if attribute_id >= 0x080
&& attribute_id < 0x280
{
BitMutView::new(&mut io.attr_in).set_bit(
(attribute_id as usize - 0x080) / 4,
true,
);
} else if attribute_id >= 0x2c0
&& attribute_id < 0x300
{
io.sysvals_in.c |=
1 << ((attribute_id - 0x2c0) / 4);
}
}
b.push_op(OpALd {
dst: dst.into(),
vtx: vtx,
@@ -1487,65 +1452,21 @@ impl<'a> ShaderFromNir<'a> {
};
assert!(intrin.def.bit_size() == 32);
let dst =
b.alloc_ssa(RegFile::GPR, intrin.def.num_components());
let comps = intrin.def.num_components();
let dst = b.alloc_ssa(RegFile::GPR, comps);
for c in 0..intrin.def.num_components() {
let attribute_id = addr + 4 * u16::from(c);
let ShaderIoInfo::Fragment(io) = &mut self.info.io else {
panic!("input interpolation is only allowed in fragment shaders");
};
if attribute_id < 0x080 {
match &mut self.info.io {
ShaderIoInfo::None => {
panic!("Stage does not support load_interpolated_input")
}
ShaderIoInfo::Vtg(VtgIoInfo {
sysvals_in, ..
})
| ShaderIoInfo::Fragment(FragmentIoInfo {
sysvals_in,
..
}) => {
sysvals_in.ab |= 1 << (attribute_id / 4);
}
}
} else if attribute_id >= 0x080 && attribute_id < 0x280 {
let user_attribute_index =
(attribute_id - 0x080) as usize / 4;
for c in 0..comps {
let c_addr = addr + 4 * u16::from(c);
match &mut self.info.io {
ShaderIoInfo::None => {
panic!("Stage does not support load_interpolated_input")
}
ShaderIoInfo::Vtg(io) => {
BitMutView::new(&mut io.attr_in)
.set_bit(user_attribute_index, true);
}
ShaderIoInfo::Fragment(io) => {
io.attr_in[user_attribute_index] = interp_mode;
}
_ => {}
}
} else if attribute_id >= 0x2c0 && attribute_id < 0x300 {
match &mut self.info.io {
ShaderIoInfo::None => {
panic!("Stage does not support load_interpolated_input")
}
ShaderIoInfo::Vtg(VtgIoInfo {
sysvals_in, ..
})
| ShaderIoInfo::Fragment(FragmentIoInfo {
sysvals_in,
..
}) => {
sysvals_in.c |=
1 << ((attribute_id - 0x2c0) / 4);
}
}
}
io.mark_attr_read(c_addr, interp_mode);
b.push_op(OpIpa {
dst: dst[usize::from(c)].into(),
addr: attribute_id,
addr: c_addr,
freq: freq,
loc: loc,
offset: offset,
@@ -1757,6 +1678,9 @@ impl<'a> ShaderFromNir<'a> {
});
}
nir_intrinsic_store_output => {
assert!(intrin.get_src(0).bit_size() == 32);
let comps = intrin.num_components;
let data = self.get_src(&srcs[0]);
let vtx = Src::new_zero();
let offset = self.get_src(&srcs[1]);
@@ -1773,7 +1697,7 @@ impl<'a> ShaderFromNir<'a> {
assert!(srcs[1].is_zero());
let base: usize = intrin.base().try_into().unwrap();
assert!(base % 4 == 0);
for c in 0..usize::from(intrin.num_components) {
for c in 0..usize::from(comps) {
self.fs_out_regs[(base / 4) + c] = data[c];
}
}
@@ -1781,38 +1705,17 @@ impl<'a> ShaderFromNir<'a> {
let addr = u16::try_from(intrin.base()).unwrap()
+ u16::try_from(intrin.component()).unwrap() * 4;
assert!(intrin.get_src(0).bit_size() == 32);
let addr_range = addr..(addr + 4 * u16::from(comps));
io.mark_attrs_written(addr_range);
let access = AttrAccess {
addr: addr,
comps: intrin.get_src(0).num_components(),
comps: comps,
patch: false,
out_load: false,
flags: 0,
};
let attribute_base_index = access.addr / 4;
for attribute_index in attribute_base_index
..attribute_base_index + access.comps as u16
{
let attribute_id = attribute_index * 4;
if attribute_id < 0x080 {
io.sysvals_out.ab |= 1 << (attribute_id / 4);
} else if attribute_id >= 0x080
&& attribute_id < 0x280
{
BitMutView::new(&mut io.attr_out).set_bit(
(attribute_id as usize - 0x080) / 4,
true,
);
} else if attribute_id >= 0x2c0
&& attribute_id < 0x300
{
io.sysvals_out.c |=
1 << ((attribute_id - 0x2c0) / 4);
}
}
b.push_op(OpASt {
vtx: vtx,
offset: offset,
+54
View File
@@ -4529,6 +4529,45 @@ pub struct VtgIoInfo {
pub store_req_end: u8,
}
impl VtgIoInfo {
fn mark_attrs(&mut self, addrs: Range<u16>, written: bool) {
let sysvals = if written {
&mut self.sysvals_out
} else {
&mut self.sysvals_in
};
let mut attr = BitMutView::new(if written {
&mut self.attr_out
} else {
&mut self.attr_in
});
let mut addrs = addrs;
addrs.start &= !3;
for addr in addrs.step_by(4) {
if addr < 0x080 {
sysvals.ab |= 1 << (addr / 4);
} else if addr < 0x280 {
let attr_idx = (addr - 0x080) as usize / 4;
attr.set_bit(attr_idx, true);
} else if addr < 0x2c0 {
panic!("FF color I/O not supported");
} else if addr < 0x300 {
sysvals.c |= 1 << ((addr - 0x2c0) / 4);
}
}
}
pub fn mark_attrs_read(&mut self, addrs: Range<u16>) {
self.mark_attrs(addrs, false);
}
pub fn mark_attrs_written(&mut self, addrs: Range<u16>) {
self.mark_attrs(addrs, true);
}
}
#[derive(Debug)]
pub struct FragmentIoInfo {
pub sysvals_in: SysValInfo,
@@ -4541,6 +4580,21 @@ pub struct FragmentIoInfo {
pub writes_depth: bool,
}
impl FragmentIoInfo {
pub fn mark_attr_read(&mut self, addr: u16, interp: PixelImap) {
if addr < 0x080 {
self.sysvals_in.ab |= 1 << (addr / 4);
} else if addr < 0x280 {
let attr_idx = (addr - 0x080) as usize / 4;
self.attr_in[attr_idx] = interp;
} else if addr < 0x2c0 {
panic!("FF color I/O not supported");
} else if addr < 0x300 {
self.sysvals_in.c |= 1 << ((addr - 0x2c0) / 4);
}
}
}
#[derive(Debug)]
pub enum ShaderIoInfo {
None,