From b78f7d208b10fbb9ce0c3a47aa672a9e074cb34f Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 26 Jul 2024 12:50:33 -0500 Subject: [PATCH] nak: Plumb through the call/return stack size Part-of: --- src/nouveau/compiler/nak.h | 3 +++ src/nouveau/compiler/nak/api.rs | 1 + src/nouveau/compiler/nak/from_nir.rs | 1 + src/nouveau/compiler/nak/hw_tests.rs | 1 + src/nouveau/compiler/nak/ir.rs | 2 ++ src/nouveau/compiler/nak/sm50.rs | 10 ++++++++++ src/nouveau/compiler/nak/sm70.rs | 5 +++++ src/nouveau/compiler/nak/sph.rs | 2 ++ src/nouveau/compiler/nak_qmd/lib.rs | 20 ++++++++++++++++++++ 9 files changed, 45 insertions(+) diff --git a/src/nouveau/compiler/nak.h b/src/nouveau/compiler/nak.h index d02b0e38226..0e2bfbb5418 100644 --- a/src/nouveau/compiler/nak.h +++ b/src/nouveau/compiler/nak.h @@ -115,6 +115,9 @@ struct nak_shader_info { /** Size of shader local (scratch) memory */ uint32_t slm_size; + /** Size of call/return stack in bytes/warp */ + uint32_t crs_size; + union { struct { /* Local workgroup size */ diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index cd862bc5f0c..0641d25dc39 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -238,6 +238,7 @@ impl ShaderBin { _pad0: Default::default(), num_instrs: info.num_instrs, slm_size: info.slm_size, + crs_size: sm.crs_size(info.max_crs_depth), __bindgen_anon_1: match &info.stage { ShaderStageInfo::Compute(cs_info) => { nak_shader_info__bindgen_ty_1 { diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 635b965a533..8bf5aa038ea 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -24,6 +24,7 @@ fn init_info_from_nir(nir: &nir_shader) -> ShaderInfo { num_instrs: 0, num_control_barriers: 0, slm_size: nir.scratch_size, + max_crs_depth: 0, uses_global_mem: false, writes_global_mem: false, // TODO: handle this. diff --git a/src/nouveau/compiler/nak/hw_tests.rs b/src/nouveau/compiler/nak/hw_tests.rs index 2d2dc43ed2c..dcf5afe82da 100644 --- a/src/nouveau/compiler/nak/hw_tests.rs +++ b/src/nouveau/compiler/nak/hw_tests.rs @@ -222,6 +222,7 @@ impl<'a> TestShaderBuilder<'a> { num_control_barriers: 0, num_instrs: 0, slm_size: 0, + max_crs_depth: 0, uses_global_mem: true, writes_global_mem: true, uses_fp64: false, diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 232a722d53e..43fbbf9468e 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -7096,6 +7096,7 @@ pub struct ShaderInfo { pub num_control_barriers: u8, pub num_instrs: u32, pub slm_size: u32, + pub max_crs_depth: u32, pub uses_global_mem: bool, pub writes_global_mem: bool, pub uses_fp64: bool, @@ -7106,6 +7107,7 @@ pub struct ShaderInfo { pub trait ShaderModel { fn sm(&self) -> u8; fn num_regs(&self, file: RegFile) -> u32; + fn crs_size(&self, max_crs_depth: u32) -> u32; fn op_can_be_uniform(&self, op: &Op) -> bool; diff --git a/src/nouveau/compiler/nak/sm50.rs b/src/nouveau/compiler/nak/sm50.rs index a92b1eaa5fc..c09fb6bacba 100644 --- a/src/nouveau/compiler/nak/sm50.rs +++ b/src/nouveau/compiler/nak/sm50.rs @@ -38,6 +38,16 @@ impl ShaderModel for ShaderModel50 { } } + fn crs_size(&self, max_crs_depth: u32) -> u32 { + if max_crs_depth <= 16 { + 0 + } else if max_crs_depth <= 32 { + 1024 + } else { + ((max_crs_depth + 32) * 16).next_multiple_of(512) + } + } + fn op_can_be_uniform(&self, _op: &Op) -> bool { false } diff --git a/src/nouveau/compiler/nak/sm70.rs b/src/nouveau/compiler/nak/sm70.rs index fe162185c0d..a391213b57c 100644 --- a/src/nouveau/compiler/nak/sm70.rs +++ b/src/nouveau/compiler/nak/sm70.rs @@ -60,6 +60,11 @@ impl ShaderModel for ShaderModel70 { } } + fn crs_size(&self, max_crs_depth: u32) -> u32 { + assert!(max_crs_depth == 0); + 0 + } + fn op_can_be_uniform(&self, op: &Op) -> bool { if !self.has_uniform_alu() { return false; diff --git a/src/nouveau/compiler/nak/sph.rs b/src/nouveau/compiler/nak/sph.rs index ea4a59d63c1..a9e38082a01 100644 --- a/src/nouveau/compiler/nak/sph.rs +++ b/src/nouveau/compiler/nak/sph.rs @@ -480,6 +480,8 @@ pub fn encode_header( let slm_size = shader_info.slm_size.next_multiple_of(16); sph.set_shader_local_memory_size(slm_size.into()); + let crs_size = sm.crs_size(shader_info.max_crs_depth); + sph.set_shader_local_memory_crs_size(crs_size); match &shader_info.io { ShaderIoInfo::Vtg(io) => { diff --git a/src/nouveau/compiler/nak_qmd/lib.rs b/src/nouveau/compiler/nak_qmd/lib.rs index 6e1496a8596..7d0350bc4d4 100644 --- a/src/nouveau/compiler/nak_qmd/lib.rs +++ b/src/nouveau/compiler/nak_qmd/lib.rs @@ -23,6 +23,7 @@ trait QMD { fn set_local_size(&mut self, width: u16, height: u16, depth: u16); fn set_prog_addr(&mut self, addr: u64); fn set_register_count(&mut self, register_count: u8); + fn set_crs_size(&mut self, crs_size: u32); fn set_slm_size(&mut self, slm_size: u32); fn set_smem_size(&mut self, smem_size: u32, smem_max: u32); } @@ -94,6 +95,16 @@ macro_rules! qmd_impl_common { }; } +macro_rules! qmd_impl_set_crs_size { + ($c:ident, $s:ident) => { + fn set_crs_size(&mut self, crs_size: u32) { + let mut bv = QMDBitView::new(&mut self.qmd); + let crs_size = crs_size.next_multiple_of(0x200); + set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_CRS_SIZE, crs_size); + } + }; +} + const SIZE_SHIFT: u8 = 0; const SIZE_SHIFTED4_SHIFT: u8 = 4; @@ -170,6 +181,7 @@ mod qmd_0_6 { } qmd_impl_common!(cla0c0, QMDV00_06); + qmd_impl_set_crs_size!(cla0c0, QMDV00_06); qmd_impl_set_cbuf!(cla0c0, QMDV00_06, SIZE); qmd_impl_set_prog_addr_32!(cla0c0, QMDV00_06); qmd_impl_set_register_count!(cla0c0, QMDV00_06, REGISTER_COUNT); @@ -214,6 +226,7 @@ mod qmd_2_1 { } qmd_impl_common!(clc0c0, QMDV02_01); + qmd_impl_set_crs_size!(clc0c0, QMDV02_01); qmd_impl_set_cbuf!(clc0c0, QMDV02_01, SIZE_SHIFTED4); qmd_impl_set_prog_addr_32!(clc0c0, QMDV02_01); qmd_impl_set_register_count!(clc0c0, QMDV02_01, REGISTER_COUNT); @@ -281,6 +294,7 @@ mod qmd_2_2 { } qmd_impl_common!(clc3c0, QMDV02_02); + qmd_impl_set_crs_size!(clc3c0, QMDV02_02); qmd_impl_set_cbuf!(clc3c0, QMDV02_02, SIZE_SHIFTED4); qmd_impl_set_prog_addr_64!(clc3c0, QMDV02_02); qmd_impl_set_register_count!(clc3c0, QMDV02_02, REGISTER_COUNT_V); @@ -308,6 +322,11 @@ mod qmd_3_0 { } qmd_impl_common!(clc6c0, QMDV03_00); + + fn set_crs_size(&mut self, crs_size: u32) { + assert!(crs_size == 0); + } + qmd_impl_set_cbuf!(clc6c0, QMDV03_00, SIZE_SHIFTED4); qmd_impl_set_prog_addr_64!(clc6c0, QMDV03_00); qmd_impl_set_register_count!(clc6c0, QMDV03_00, REGISTER_COUNT_V); @@ -337,6 +356,7 @@ fn fill_qmd(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q { ); qmd.set_prog_addr(qmd_info.addr); qmd.set_register_count(info.num_gprs); + qmd.set_crs_size(info.crs_size); qmd.set_slm_size(info.slm_size); assert!(qmd_info.smem_size >= cs_info.smem_size);