intel/rt: fix ray_query stack address calculation

While the documentation says to use NUM_SIMD_LANES_PER_DSS for the stack
address calculation, what the HW actually uses is
NUM_SYNC_STACKID_PER_DSS. The former may vary depending on the platform,
while the latter is fixed to 2048 for all current platforms.

Fixes: 6c84cbd8c9 ("intel/dev/xe: Set max_eus_per_subslice using topology query")

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32049>
This commit is contained in:
Iván Briano
2024-11-07 15:48:20 -08:00
committed by Marge Bot
parent 7aad19ccd2
commit aee04bf4fb
2 changed files with 16 additions and 20 deletions
@@ -159,7 +159,7 @@ get_ray_query_shadow_addr(nir_builder *b,
nir_imul(
b,
brw_load_btd_dss_id(b),
brw_nir_rt_load_num_simd_lanes_per_dss(b, state->devinfo)),
state->globals.num_dss_rt_stacks),
brw_nir_rt_sync_stack_id(b)),
BRW_RT_SIZEOF_SHADOW_RAY_QUERY);
@@ -232,7 +232,8 @@ lower_ray_query_intrinsic(nir_builder *b,
nir_def *shadow_stack_addr =
get_ray_query_shadow_addr(b, deref, state, &ctrl_level_deref);
nir_def *hw_stack_addr =
brw_nir_rt_sync_stack_addr(b, state->globals.base_mem_addr, state->devinfo);
brw_nir_rt_sync_stack_addr(b, state->globals.base_mem_addr,
state->globals.num_dss_rt_stacks);
nir_def *stack_addr = shadow_stack_addr ? shadow_stack_addr : hw_stack_addr;
switch (intrin->intrinsic) {
+13 -18
View File
@@ -74,15 +74,6 @@ brw_load_btd_dss_id(nir_builder *b)
return nir_load_topology_id_intel(b, .base = BRW_TOPOLOGY_ID_DSS);
}
static inline nir_def *
brw_nir_rt_load_num_simd_lanes_per_dss(nir_builder *b,
const struct intel_device_info *devinfo)
{
return nir_imm_int(b, devinfo->num_thread_per_eu *
devinfo->max_eus_per_subslice *
16 /* The RT computation is based off SIMD16 */);
}
static inline nir_def *
brw_load_eu_thread_simd(nir_builder *b)
{
@@ -187,23 +178,27 @@ brw_nir_rt_sw_hotzone_addr(nir_builder *b,
static inline nir_def *
brw_nir_rt_sync_stack_addr(nir_builder *b,
nir_def *base_mem_addr,
const struct intel_device_info *devinfo)
nir_def *num_dss_rt_stacks)
{
/* For Ray queries (Synchronous Ray Tracing), the formula is similar but
* goes down from rtMemBasePtr :
/* Bspec 47547 (Xe) and 56936 (Xe2+) say:
* For Ray queries (Synchronous Ray Tracing), the formula is similar but
* goes down from rtMemBasePtr :
*
* syncBase = RTDispatchGlobals.rtMemBasePtr
* - (DSSID * NUM_SIMD_LANES_PER_DSS + SyncStackID + 1)
* * syncStackSize
* syncBase = RTDispatchGlobals.rtMemBasePtr
* - (DSSID * NUM_SIMD_LANES_PER_DSS + SyncStackID + 1)
* * syncStackSize
*
* We assume that we can calculate a 32-bit offset first and then add it
* to the 64-bit base address at the end.
* We assume that we can calculate a 32-bit offset first and then add it
* to the 64-bit base address at the end.
*
* However, on HSD 14020275151 it's clarified that the HW uses
* NUM_SYNC_STACKID_PER_DSS instead.
*/
nir_def *offset32 =
nir_imul(b,
nir_iadd(b,
nir_imul(b, brw_load_btd_dss_id(b),
brw_nir_rt_load_num_simd_lanes_per_dss(b, devinfo)),
num_dss_rt_stacks),
nir_iadd_imm(b, brw_nir_rt_sync_stack_id(b), 1)),
nir_imm_int(b, BRW_RT_SIZEOF_RAY_QUERY));
return nir_isub(b, base_mem_addr, nir_u2u64(b, offset32));