diff --git a/src/intel/compiler/brw_nir_lower_ray_queries.c b/src/intel/compiler/brw_nir_lower_ray_queries.c index 844b90da47b..bc951104378 100644 --- a/src/intel/compiler/brw_nir_lower_ray_queries.c +++ b/src/intel/compiler/brw_nir_lower_ray_queries.c @@ -159,7 +159,7 @@ get_ray_query_shadow_addr(nir_builder *b, nir_imul( b, brw_load_btd_dss_id(b), - brw_nir_rt_load_num_simd_lanes_per_dss(b, state->devinfo)), + state->globals.num_dss_rt_stacks), brw_nir_rt_sync_stack_id(b)), BRW_RT_SIZEOF_SHADOW_RAY_QUERY); @@ -232,7 +232,8 @@ lower_ray_query_intrinsic(nir_builder *b, nir_def *shadow_stack_addr = get_ray_query_shadow_addr(b, deref, state, &ctrl_level_deref); nir_def *hw_stack_addr = - brw_nir_rt_sync_stack_addr(b, state->globals.base_mem_addr, state->devinfo); + brw_nir_rt_sync_stack_addr(b, state->globals.base_mem_addr, + state->globals.num_dss_rt_stacks); nir_def *stack_addr = shadow_stack_addr ? shadow_stack_addr : hw_stack_addr; switch (intrin->intrinsic) { diff --git a/src/intel/compiler/brw_nir_rt_builder.h b/src/intel/compiler/brw_nir_rt_builder.h index 6283ec2c12e..d0f71b13e27 100644 --- a/src/intel/compiler/brw_nir_rt_builder.h +++ b/src/intel/compiler/brw_nir_rt_builder.h @@ -74,15 +74,6 @@ brw_load_btd_dss_id(nir_builder *b) return nir_load_topology_id_intel(b, .base = BRW_TOPOLOGY_ID_DSS); } -static inline nir_def * -brw_nir_rt_load_num_simd_lanes_per_dss(nir_builder *b, - const struct intel_device_info *devinfo) -{ - return nir_imm_int(b, devinfo->num_thread_per_eu * - devinfo->max_eus_per_subslice * - 16 /* The RT computation is based off SIMD16 */); -} - static inline nir_def * brw_load_eu_thread_simd(nir_builder *b) { @@ -187,23 +178,27 @@ brw_nir_rt_sw_hotzone_addr(nir_builder *b, static inline nir_def * brw_nir_rt_sync_stack_addr(nir_builder *b, nir_def *base_mem_addr, - const struct intel_device_info *devinfo) + nir_def *num_dss_rt_stacks) { - /* For Ray queries (Synchronous Ray Tracing), the formula is similar but - * goes down from rtMemBasePtr : + /* Bspec 47547 (Xe) and 56936 (Xe2+) say: + * For Ray queries (Synchronous Ray Tracing), the formula is similar but + * goes down from rtMemBasePtr : * - * syncBase = RTDispatchGlobals.rtMemBasePtr - * - (DSSID * NUM_SIMD_LANES_PER_DSS + SyncStackID + 1) - * * syncStackSize + * syncBase = RTDispatchGlobals.rtMemBasePtr + * - (DSSID * NUM_SIMD_LANES_PER_DSS + SyncStackID + 1) + * * syncStackSize * - * We assume that we can calculate a 32-bit offset first and then add it - * to the 64-bit base address at the end. + * We assume that we can calculate a 32-bit offset first and then add it + * to the 64-bit base address at the end. + * + * However, on HSD 14020275151 it's clarified that the HW uses + * NUM_SYNC_STACKID_PER_DSS instead. */ nir_def *offset32 = nir_imul(b, nir_iadd(b, nir_imul(b, brw_load_btd_dss_id(b), - brw_nir_rt_load_num_simd_lanes_per_dss(b, devinfo)), + num_dss_rt_stacks), nir_iadd_imm(b, brw_nir_rt_sync_stack_id(b), 1)), nir_imm_int(b, BRW_RT_SIZEOF_RAY_QUERY)); return nir_isub(b, base_mem_addr, nir_u2u64(b, offset32));