From c4f2a8d984bb32c1508dbecb2b0037511db249ca Mon Sep 17 00:00:00 2001 From: Sagar Ghuge Date: Wed, 14 Aug 2024 21:27:09 -0700 Subject: [PATCH] intel/compiler: Fix indirect offset in GS input read for Xe2+ Make sure to take new GRF size into consideration and adjust the indirect offset according to new size so that when we do the indirect load with address register, we load right values. This helps pass the following tests: - dEQP-VK.binding_model.descriptor_buffer.mutable_descriptor.*geom* - dEQP-VK.ray_query.*geometry_shader.* Backport-to: 24.2 Signed-off-by: Sagar Ghuge Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw_fs_nir.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index f20912d4dba..606f457a785 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2614,11 +2614,13 @@ emit_gs_input_load(nir_to_brw_state &ntb, const brw_reg &dst, brw_reg start = s.gs_payload().icp_handle_start; brw_reg icp_handle = ntb.bld.vgrf(BRW_TYPE_UD); + const unsigned grf_size_bytes = REG_SIZE * reg_unit(devinfo); if (gs_prog_data->invocations == 1) { if (nir_src_is_const(vertex_src)) { /* The vertex index is constant; just select the proper URB handle. */ - icp_handle = offset(start, ntb.bld, nir_src_as_uint(vertex_src)); + icp_handle = + byte_offset(start, nir_src_as_uint(vertex_src) * grf_size_bytes); } else { /* The vertex index is non-constant. We need to use indirect * addressing to fetch the proper URB handle. @@ -2628,17 +2630,18 @@ emit_gs_input_load(nir_to_brw_state &ntb, const brw_reg &dst, * DWord . We convert that to bytes by multiplying by 4. * * Next, we convert the vertex index to bytes by multiplying - * by 32 (shifting by 5), and add the two together. This is + * by 32/64 (shifting by 5/6), and add the two together. This is * the final indirect byte offset. */ brw_reg sequence = bld.LOAD_SUBGROUP_INVOCATION(); /* channel_offsets = 4 * sequence = <28, 24, 20, 16, 12, 8, 4, 0> */ brw_reg channel_offsets = bld.SHL(sequence, brw_imm_ud(2u)); - /* Convert vertex_index to bytes (multiply by 32) */ + /* Convert vertex_index to bytes (multiply by 32/64) */ + assert(util_is_power_of_two_nonzero(grf_size_bytes)); /* for ffs() */ brw_reg vertex_offset_bytes = bld.SHL(retype(get_nir_src(ntb, vertex_src), BRW_TYPE_UD), - brw_imm_ud(5u)); + brw_imm_ud(ffs(grf_size_bytes) - 1)); brw_reg icp_offset_bytes = bld.ADD(vertex_offset_bytes, channel_offsets); @@ -2648,7 +2651,7 @@ emit_gs_input_load(nir_to_brw_state &ntb, const brw_reg &dst, */ bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, start, brw_reg(icp_offset_bytes), - brw_imm_ud(s.nir->info.gs.vertices_in * REG_SIZE)); + brw_imm_ud(s.nir->info.gs.vertices_in * grf_size_bytes)); } } else { assert(gs_prog_data->invocations > 1); @@ -2673,7 +2676,7 @@ emit_gs_input_load(nir_to_brw_state &ntb, const brw_reg &dst, bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle, start, brw_reg(icp_offset_bytes), brw_imm_ud(DIV_ROUND_UP(s.nir->info.gs.vertices_in, 8) * - REG_SIZE)); + grf_size_bytes)); } }