|
|
|
@@ -25,7 +25,7 @@ trait QMD {
|
|
|
|
|
fn set_register_count(&mut self, register_count: u8);
|
|
|
|
|
fn set_crs_size(&mut self, crs_size: u32);
|
|
|
|
|
fn set_slm_size(&mut self, slm_size: u32);
|
|
|
|
|
fn set_smem_size(&mut self, smem_size: u32, smem_max: u32);
|
|
|
|
|
fn set_smem_size(&mut self, smem_size: u32, smem_sizes: &[u16]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
macro_rules! set_enum {
|
|
|
|
@@ -282,7 +282,7 @@ mod qmd_0_6 {
|
|
|
|
|
qmd_impl_set_register_count!(cla0c0, QMDV00_06, REGISTER_COUNT);
|
|
|
|
|
qmd_impl_set_slm_size!(cla0c0, QMDV00_06, NONE);
|
|
|
|
|
|
|
|
|
|
fn set_smem_size(&mut self, smem_size: u32, _smem_max: u32) {
|
|
|
|
|
fn set_smem_size(&mut self, smem_size: u32, _smem_sizes: &[u16]) {
|
|
|
|
|
let mut bv = QMDBitView::new(&mut self.qmd);
|
|
|
|
|
|
|
|
|
|
let smem_size = smem_size.next_multiple_of(0x100);
|
|
|
|
@@ -328,7 +328,7 @@ mod qmd_2_1 {
|
|
|
|
|
qmd_impl_set_register_count!(clc0c0, QMDV02_01, REGISTER_COUNT);
|
|
|
|
|
qmd_impl_set_slm_size!(clc0c0, QMDV02_01, NONE);
|
|
|
|
|
|
|
|
|
|
fn set_smem_size(&mut self, smem_size: u32, _smem_max: u32) {
|
|
|
|
|
fn set_smem_size(&mut self, smem_size: u32, _smem_sizes: &[u16]) {
|
|
|
|
|
let mut bv = QMDBitView::new(&mut self.qmd);
|
|
|
|
|
|
|
|
|
|
let smem_size = smem_size.next_multiple_of(0x100);
|
|
|
|
@@ -338,43 +338,41 @@ mod qmd_2_1 {
|
|
|
|
|
}
|
|
|
|
|
use qmd_2_1::Qmd2_1;
|
|
|
|
|
|
|
|
|
|
fn gv100_sm_config_smem_size(size: u32) -> u32 {
|
|
|
|
|
let size = if size > 64 * 1024 {
|
|
|
|
|
96 * 1024
|
|
|
|
|
} else if size > 32 * 1024 {
|
|
|
|
|
64 * 1024
|
|
|
|
|
} else if size > 16 * 1024 {
|
|
|
|
|
32 * 1024
|
|
|
|
|
} else if size > 8 * 1024 {
|
|
|
|
|
16 * 1024
|
|
|
|
|
} else {
|
|
|
|
|
8 * 1024
|
|
|
|
|
};
|
|
|
|
|
fn gv100_smem_size_to_hw(size_kb: u16) -> u16 {
|
|
|
|
|
assert!(size_kb % 4 == 0);
|
|
|
|
|
(size_kb / 4) + 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size / 4096 + 1
|
|
|
|
|
fn gv100_pick_smem_size_kb(size: u32, smem_sizes_kb: &[u16]) -> u16 {
|
|
|
|
|
*smem_sizes_kb
|
|
|
|
|
.iter()
|
|
|
|
|
.find(|&&val| u32::from(val) * 1024 >= size)
|
|
|
|
|
.expect("Requested shared memory not supported by the hw.")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
macro_rules! qmd_impl_set_smem_size_bounded {
|
|
|
|
|
($c:ident, $s:ident) => {
|
|
|
|
|
fn set_smem_size(&mut self, smem_size: u32, smem_max: u32) {
|
|
|
|
|
fn set_smem_size(&mut self, smem_size: u32, smem_sizes: &[u16]) {
|
|
|
|
|
let mut bv = QMDBitView::new(&mut self.qmd);
|
|
|
|
|
|
|
|
|
|
let smem_size = smem_size.next_multiple_of(0x100);
|
|
|
|
|
set_field!(bv, $c, $s, SHARED_MEMORY_SIZE, smem_size);
|
|
|
|
|
|
|
|
|
|
let max = gv100_sm_config_smem_size(smem_max);
|
|
|
|
|
let min = gv100_sm_config_smem_size(smem_size.into());
|
|
|
|
|
let target = gv100_sm_config_smem_size(smem_size.into());
|
|
|
|
|
set_field!(bv, $c, $s, MIN_SM_CONFIG_SHARED_MEM_SIZE, min);
|
|
|
|
|
set_field!(bv, $c, $s, MAX_SM_CONFIG_SHARED_MEM_SIZE, max);
|
|
|
|
|
set_field!(bv, $c, $s, TARGET_SM_CONFIG_SHARED_MEM_SIZE, target);
|
|
|
|
|
let smem_size_kb = gv100_pick_smem_size_kb(smem_size, smem_sizes);
|
|
|
|
|
let smem_hw = gv100_smem_size_to_hw(smem_size_kb);
|
|
|
|
|
let smem_hw_max =
|
|
|
|
|
gv100_smem_size_to_hw(*smem_sizes.last().unwrap());
|
|
|
|
|
|
|
|
|
|
set_field!(bv, $c, $s, MIN_SM_CONFIG_SHARED_MEM_SIZE, smem_hw);
|
|
|
|
|
set_field!(bv, $c, $s, MAX_SM_CONFIG_SHARED_MEM_SIZE, smem_hw_max);
|
|
|
|
|
set_field!(bv, $c, $s, TARGET_SM_CONFIG_SHARED_MEM_SIZE, smem_hw);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
macro_rules! qmd_impl_set_smem_size_bounded_gb {
|
|
|
|
|
($c:ident, $s:ident) => {
|
|
|
|
|
fn set_smem_size(&mut self, smem_size: u32, smem_max: u32) {
|
|
|
|
|
fn set_smem_size(&mut self, smem_size: u32, smem_sizes: &[u16]) {
|
|
|
|
|
let mut bv = QMDBitView::new(&mut self.qmd);
|
|
|
|
|
|
|
|
|
|
let smem_size = smem_size.next_multiple_of(0x100);
|
|
|
|
@@ -389,12 +387,14 @@ macro_rules! qmd_impl_set_smem_size_bounded_gb {
|
|
|
|
|
smem_size_shifted
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let max = gv100_sm_config_smem_size(smem_max);
|
|
|
|
|
let min = gv100_sm_config_smem_size(smem_size.into());
|
|
|
|
|
let target = gv100_sm_config_smem_size(smem_size.into());
|
|
|
|
|
set_field!(bv, $c, $s, MIN_SM_CONFIG_SHARED_MEM_SIZE, min);
|
|
|
|
|
set_field!(bv, $c, $s, MAX_SM_CONFIG_SHARED_MEM_SIZE, max);
|
|
|
|
|
set_field!(bv, $c, $s, TARGET_SM_CONFIG_SHARED_MEM_SIZE, target);
|
|
|
|
|
let smem_size_kb = gv100_pick_smem_size_kb(smem_size, smem_sizes);
|
|
|
|
|
let smem_hw = gv100_smem_size_to_hw(smem_size_kb);
|
|
|
|
|
let smem_hw_max =
|
|
|
|
|
gv100_smem_size_to_hw(*smem_sizes.last().unwrap());
|
|
|
|
|
|
|
|
|
|
set_field!(bv, $c, $s, MIN_SM_CONFIG_SHARED_MEM_SIZE, smem_hw);
|
|
|
|
|
set_field!(bv, $c, $s, MAX_SM_CONFIG_SHARED_MEM_SIZE, smem_hw_max);
|
|
|
|
|
set_field!(bv, $c, $s, TARGET_SM_CONFIG_SHARED_MEM_SIZE, smem_hw);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
@@ -545,7 +545,11 @@ mod qmd_5_0 {
|
|
|
|
|
}
|
|
|
|
|
use qmd_5_0::Qmd5_0;
|
|
|
|
|
|
|
|
|
|
fn fill_qmd<Q: QMD>(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q {
|
|
|
|
|
fn fill_qmd<Q: QMD>(
|
|
|
|
|
dev: &nv_device_info,
|
|
|
|
|
info: &nak_shader_info,
|
|
|
|
|
qmd_info: &nak_qmd_info,
|
|
|
|
|
) -> Q {
|
|
|
|
|
let cs_info = unsafe {
|
|
|
|
|
assert!(info.stage == MESA_SHADER_COMPUTE);
|
|
|
|
|
&info.__bindgen_anon_1.cs
|
|
|
|
@@ -569,9 +573,10 @@ fn fill_qmd<Q: QMD>(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q {
|
|
|
|
|
qmd.set_crs_size(info.crs_size);
|
|
|
|
|
qmd.set_slm_size(info.slm_size);
|
|
|
|
|
|
|
|
|
|
assert!(qmd_info.smem_size >= u32::from(cs_info.smem_size));
|
|
|
|
|
assert!(qmd_info.smem_size <= qmd_info.smem_max);
|
|
|
|
|
qmd.set_smem_size(qmd_info.smem_size.into(), qmd_info.smem_max.into());
|
|
|
|
|
assert!(qmd_info.smem_size <= u32::from(dev.max_smem_per_wg_kB) * 1024);
|
|
|
|
|
|
|
|
|
|
let smem_sizes = &dev.sm_smem_sizes_kB[0..dev.sm_smem_size_count.into()];
|
|
|
|
|
qmd.set_smem_size(qmd_info.smem_size.into(), smem_sizes);
|
|
|
|
|
|
|
|
|
|
for i in 0..qmd_info.num_cbufs {
|
|
|
|
|
let cb = &qmd_info.cbufs[usize::try_from(i).unwrap()];
|
|
|
|
@@ -625,27 +630,27 @@ pub extern "C" fn nak_fill_qmd(
|
|
|
|
|
if dev.cls_compute >= clcdc0::BLACKWELL_COMPUTE_A {
|
|
|
|
|
let qmd_out = qmd_out as *mut Qmd5_0;
|
|
|
|
|
assert!(qmd_size == size_of_val(&*qmd_out));
|
|
|
|
|
qmd_out.write(fill_qmd(info, qmd_info));
|
|
|
|
|
qmd_out.write(fill_qmd(dev, info, qmd_info));
|
|
|
|
|
} else if dev.cls_compute >= clcbc0::HOPPER_COMPUTE_A {
|
|
|
|
|
let qmd_out = qmd_out as *mut Qmd4_0;
|
|
|
|
|
assert!(qmd_size == size_of_val(&*qmd_out));
|
|
|
|
|
qmd_out.write(fill_qmd(info, qmd_info));
|
|
|
|
|
qmd_out.write(fill_qmd(dev, info, qmd_info));
|
|
|
|
|
} else if dev.cls_compute >= clc6c0::AMPERE_COMPUTE_A {
|
|
|
|
|
let qmd_out = qmd_out as *mut Qmd3_0;
|
|
|
|
|
assert!(qmd_size == size_of_val(&*qmd_out));
|
|
|
|
|
qmd_out.write(fill_qmd(info, qmd_info));
|
|
|
|
|
qmd_out.write(fill_qmd(dev, info, qmd_info));
|
|
|
|
|
} else if dev.cls_compute >= clc3c0::VOLTA_COMPUTE_A {
|
|
|
|
|
let qmd_out = qmd_out as *mut Qmd2_2;
|
|
|
|
|
assert!(qmd_size == size_of_val(&*qmd_out));
|
|
|
|
|
qmd_out.write(fill_qmd(info, qmd_info));
|
|
|
|
|
qmd_out.write(fill_qmd(dev, info, qmd_info));
|
|
|
|
|
} else if dev.cls_compute >= clc0c0::PASCAL_COMPUTE_A {
|
|
|
|
|
let qmd_out = qmd_out as *mut Qmd2_1;
|
|
|
|
|
assert!(qmd_size == size_of_val(&*qmd_out));
|
|
|
|
|
qmd_out.write(fill_qmd(info, qmd_info));
|
|
|
|
|
qmd_out.write(fill_qmd(dev, info, qmd_info));
|
|
|
|
|
} else if dev.cls_compute >= cla0c0::KEPLER_COMPUTE_A {
|
|
|
|
|
let qmd_out = qmd_out as *mut Qmd0_6;
|
|
|
|
|
assert!(qmd_size == size_of_val(&*qmd_out));
|
|
|
|
|
qmd_out.write(fill_qmd(info, qmd_info));
|
|
|
|
|
qmd_out.write(fill_qmd(dev, info, qmd_info));
|
|
|
|
|
} else {
|
|
|
|
|
panic!("Unknown shader model");
|
|
|
|
|
}
|
|
|
|
|