From d831f38d1139c0fd612c371629e47549eab756d1 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 8 Dec 2025 01:00:57 -0800
Subject: [PATCH] brw: Delete all the old backend mesh/task URB handling code

This has all been replaced by NIR lowering to URB intrinsics.

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38918>
---
 src/intel/compiler/brw/brw_from_nir.cpp | 645 +-----------------------
 1 file changed, 10 insertions(+), 635 deletions(-)

diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp
index 19a183aceb2..78a868021b9 100644
--- a/src/intel/compiler/brw/brw_from_nir.cpp
+++ b/src/intel/compiler/brw/brw_from_nir.cpp
@@ -4872,15 +4872,6 @@ get_timestamp(const brw_builder &bld)
    return dst;
 }
 
-static unsigned
-component_from_intrinsic(nir_intrinsic_instr *instr)
-{
-   if (nir_intrinsic_has_component(instr))
-      return nir_intrinsic_component(instr);
-   else
-      return 0;
-}
-
 static void
 adjust_handle_and_offset(const brw_builder &bld,
                          brw_reg &urb_handle,
@@ -4899,563 +4890,10 @@ adjust_handle_and_offset(const brw_builder &bld,
 }
 
 static void
-emit_urb_direct_vec4_write(const brw_builder &bld,
-                           unsigned urb_global_offset,
-                           const brw_reg &src,
-                           brw_reg urb_handle,
-                           unsigned dst_comp_offset,
-                           unsigned comps,
-                           unsigned mask)
-{
-   assert(bld.shader->devinfo->ver < 20);
-
-   for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
-      brw_builder bld8 = bld.group(8, q);
-
-      brw_reg payload_srcs[8];
-      unsigned length = 0;
-
-      for (unsigned i = 0; i < dst_comp_offset; i++)
-         payload_srcs[length++] = reg_undef;
-
-      for (unsigned c = 0; c < comps; c++)
-         payload_srcs[length++] = quarter(offset(src, bld, c), q);
-
-      brw_reg srcs[URB_LOGICAL_NUM_SRCS];
-      srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
-      srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
-      srcs[URB_LOGICAL_SRC_DATA] =
-         retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
-      bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
-
-      brw_urb_inst *urb = bld8.URB_WRITE(srcs, ARRAY_SIZE(srcs));
-      urb->offset = urb_global_offset;
-      urb->components = length;
-      assert(urb->offset < 2048);
-   }
-}
-
-static void
-emit_urb_direct_writes(const brw_builder &bld, nir_intrinsic_instr *instr,
-                       const brw_reg &src, brw_reg urb_handle)
-{
-   assert(bld.shader->devinfo->ver < 20);
-   assert(nir_src_bit_size(instr->src[0]) == 32);
-
-   nir_src *offset_nir_src = nir_get_io_offset_src(instr);
-   assert(nir_src_is_const(*offset_nir_src));
-
-   const unsigned comps = nir_src_num_components(instr->src[0]);
-   assert(comps <= 4);
-
-   const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
-                                     nir_src_as_uint(*offset_nir_src) +
-                                     component_from_intrinsic(instr);
-
-   /* URB writes are vec4 aligned but the intrinsic offsets are in dwords.
-    * We can write up to 8 dwords, so single vec4 write is enough.
-    */
-   const unsigned comp_shift = offset_in_dwords % 4;
-   const unsigned mask = nir_intrinsic_write_mask(instr) << comp_shift;
-
-   unsigned urb_global_offset = offset_in_dwords / 4;
-   adjust_handle_and_offset(bld, urb_handle, urb_global_offset);
-
-   emit_urb_direct_vec4_write(bld, urb_global_offset, src, urb_handle,
-                              comp_shift, comps, mask);
-}
-
-static void
-emit_urb_direct_vec4_write_xe2(const brw_builder &bld,
-                               unsigned offset_in_bytes,
-                               const brw_reg &src,
-                               brw_reg urb_handle,
-                               unsigned comps,
-                               unsigned mask)
-{
-   const struct intel_device_info *devinfo = bld.shader->devinfo;
-   const unsigned runit = reg_unit(devinfo);
-   const unsigned write_size = 8 * runit;
-
-   if (offset_in_bytes > 0) {
-      brw_builder bldall = bld.group(write_size, 0).exec_all();
-      urb_handle = bldall.ADD(urb_handle, brw_imm_ud(offset_in_bytes));
-   }
-
-   for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) {
-      brw_builder hbld = bld.group(write_size, q);
-
-      assert(comps <= 4);
-      brw_reg payload_srcs[4];
-
-      for (unsigned c = 0; c < comps; c++)
-         payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q);
-
-      brw_reg srcs[URB_LOGICAL_NUM_SRCS];
-      srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
-      srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
-      srcs[URB_LOGICAL_SRC_DATA] =
-         retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
-      hbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
-
-      brw_urb_inst *urb = hbld.URB_WRITE(srcs, ARRAY_SIZE(srcs));
-      urb->components = comps;
-   }
-}
-
-static void
-emit_urb_direct_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
-                           const brw_reg &src, brw_reg urb_handle)
-{
-   assert(nir_src_bit_size(instr->src[0]) == 32);
-
-   nir_src *offset_nir_src = nir_get_io_offset_src(instr);
-   assert(nir_src_is_const(*offset_nir_src));
-
-   const unsigned comps = nir_src_num_components(instr->src[0]);
-   assert(comps <= 4);
-
-   const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
-                                     nir_src_as_uint(*offset_nir_src) +
-                                     component_from_intrinsic(instr);
-
-   const unsigned mask = nir_intrinsic_write_mask(instr);
-
-   emit_urb_direct_vec4_write_xe2(bld, offset_in_dwords * 4, src,
-                                    urb_handle, comps, mask);
-}
-
-static void
-emit_urb_indirect_vec4_write(const brw_builder &bld,
-                             const brw_reg &offset_src,
-                             unsigned base,
-                             const brw_reg &src,
-                             brw_reg urb_handle,
-                             unsigned dst_comp_offset,
-                             unsigned comps,
-                             unsigned mask)
-{
-   assert(bld.shader->devinfo->ver < 20);
-
-   for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
-      brw_builder bld8 = bld.group(8, q);
-
-      /* offset is always positive, so signedness doesn't matter */
-      assert(offset_src.type == BRW_TYPE_D || offset_src.type == BRW_TYPE_UD);
-      brw_reg qtr = bld8.MOV(quarter(retype(offset_src, BRW_TYPE_UD), q));
-      brw_reg off = bld8.SHR(bld8.ADD(qtr, brw_imm_ud(base)), brw_imm_ud(2));
-
-      brw_reg payload_srcs[8];
-      unsigned length = 0;
-
-      for (unsigned i = 0; i < dst_comp_offset; i++)
-         payload_srcs[length++] = reg_undef;
-
-      for (unsigned c = 0; c < comps; c++)
-         payload_srcs[length++] = quarter(offset(src, bld, c), q);
-
-      brw_reg srcs[URB_LOGICAL_NUM_SRCS];
-      srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
-      srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
-      srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
-      srcs[URB_LOGICAL_SRC_DATA] =
-         retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
-      bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
-
-      brw_urb_inst *urb = bld8.URB_WRITE(srcs, ARRAY_SIZE(srcs));
-      urb->components = length;
-   }
-}
-
-static void
-emit_urb_indirect_writes_mod(const brw_builder &bld, nir_intrinsic_instr *instr,
-                             const brw_reg &src, const brw_reg &offset_src,
-                             brw_reg urb_handle, unsigned mod)
-{
-   assert(bld.shader->devinfo->ver < 20);
-   assert(nir_src_bit_size(instr->src[0]) == 32);
-
-   const unsigned comps = nir_src_num_components(instr->src[0]);
-   assert(comps <= 4);
-
-   const unsigned base_in_dwords = nir_intrinsic_base(instr) +
-                                   component_from_intrinsic(instr);
-
-   const unsigned comp_shift = mod;
-   const unsigned mask = nir_intrinsic_write_mask(instr) << comp_shift;
-
-   emit_urb_indirect_vec4_write(bld, offset_src, base_in_dwords, src,
-                                urb_handle, comp_shift, comps, mask);
-}
-
-static void
-emit_urb_indirect_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
-                             const brw_reg &src, const brw_reg &offset_src,
-                             brw_reg urb_handle)
-{
-   assert(nir_src_bit_size(instr->src[0]) == 32);
-
-   const struct intel_device_info *devinfo = bld.shader->devinfo;
-   const unsigned runit = reg_unit(devinfo);
-   const unsigned write_size = 8 * runit;
-
-   const unsigned comps = nir_src_num_components(instr->src[0]);
-   assert(comps <= 4);
-
-   const unsigned base_in_dwords = nir_intrinsic_base(instr) +
-                                   component_from_intrinsic(instr);
-
-   if (base_in_dwords > 0) {
-      brw_builder bldall = bld.group(write_size, 0).exec_all();
-      urb_handle = bldall.ADD(urb_handle, brw_imm_ud(base_in_dwords * 4));
-   }
-
-   const unsigned mask = nir_intrinsic_write_mask(instr);
-
-   for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) {
-      brw_builder wbld = bld.group(write_size, q);
-
-      brw_reg payload_srcs[4];
-
-      for (unsigned c = 0; c < comps; c++)
-         payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q);
-
-      brw_reg addr =
-         wbld.ADD(wbld.SHL(retype(horiz_offset(offset_src, write_size * q),
-                                  BRW_TYPE_UD),
-                           brw_imm_ud(2)), urb_handle);
-
-      brw_reg srcs[URB_LOGICAL_NUM_SRCS];
-      srcs[URB_LOGICAL_SRC_HANDLE] = addr;
-      srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
-      srcs[URB_LOGICAL_SRC_DATA] =
-         retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
-      wbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
-
-      brw_urb_inst *urb = wbld.URB_WRITE(srcs, ARRAY_SIZE(srcs));
-      urb->components = comps;
-   }
-}
-
-static void
-emit_urb_indirect_writes(const brw_builder &bld, nir_intrinsic_instr *instr,
-                         const brw_reg &src, const brw_reg &offset_src,
-                         brw_reg urb_handle)
-{
-   assert(bld.shader->devinfo->ver < 20);
-   assert(nir_src_bit_size(instr->src[0]) == 32);
-
-   const unsigned comps = nir_src_num_components(instr->src[0]);
-   assert(comps <= 4);
-
-   const unsigned base_in_dwords = nir_intrinsic_base(instr) +
-                                   component_from_intrinsic(instr);
-
-   /* Use URB write message that allow different offsets per-slot.  The offset
-    * is in units of vec4s (128 bits), so we use a write for each component,
-    * replicating it in the sources and applying the appropriate mask based on
-    * the dword offset.
-    */
-
-   for (unsigned c = 0; c < comps; c++) {
-      if (((1 << c) & nir_intrinsic_write_mask(instr)) == 0)
-         continue;
-
-      brw_reg src_comp = offset(src, bld, c);
-
-      for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
-         brw_builder bld8 = bld.group(8, q);
-
-         /* offset is always positive, so signedness doesn't matter */
-         assert(offset_src.type == BRW_TYPE_D ||
-                offset_src.type == BRW_TYPE_UD);
-
-         brw_reg off =
-            bld8.ADD(quarter(retype(offset_src, BRW_TYPE_UD), q),
-                     brw_imm_ud(c + base_in_dwords));
-         brw_reg m = bld8.AND(off, brw_imm_ud(0x3));
-         brw_reg mask = bld8.SHL(bld8.MOV(brw_imm_ud(1)), m);
-         brw_reg final_offset = bld8.SHR(off, brw_imm_ud(2));
-
-         brw_reg payload_srcs[4];
-         unsigned length = 0;
-
-         for (unsigned j = 0; j < 4; j++)
-            payload_srcs[length++] = quarter(src_comp, q);
-
-         brw_reg srcs[URB_LOGICAL_NUM_SRCS];
-         srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
-         srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = final_offset;
-         srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask;
-         srcs[URB_LOGICAL_SRC_DATA] =
-            retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
-         bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
-
-         brw_urb_inst *urb = bld8.URB_WRITE(srcs, ARRAY_SIZE(srcs));
-         urb->components = length;
-      }
-   }
-}
-
-static void
-emit_urb_direct_reads(const brw_builder &bld, nir_intrinsic_instr *instr,
-                      const brw_reg &dest, brw_reg urb_handle)
-{
-   assert(bld.shader->devinfo->ver < 20);
-   assert(instr->def.bit_size == 32);
-
-   unsigned comps = instr->def.num_components;
-   if (comps == 0)
-      return;
-
-   nir_src *offset_nir_src = nir_get_io_offset_src(instr);
-   assert(nir_src_is_const(*offset_nir_src));
-
-   const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
-                                     nir_src_as_uint(*offset_nir_src) +
-                                     component_from_intrinsic(instr);
-
-   unsigned urb_global_offset = offset_in_dwords / 4;
-   adjust_handle_and_offset(bld, urb_handle, urb_global_offset);
-
-   const unsigned comp_offset = offset_in_dwords % 4;
-   const unsigned num_regs = comp_offset + comps;
-
-   brw_builder ubld8 = bld.group(8, 0).exec_all();
-   brw_reg data = ubld8.vgrf(BRW_TYPE_UD, num_regs);
-   brw_reg srcs[URB_LOGICAL_NUM_SRCS];
-   srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
-
-   brw_urb_inst *urb = ubld8.URB_READ(data, srcs, ARRAY_SIZE(srcs));
-   urb->offset = urb_global_offset;
-   assert(urb->offset < 2048);
-   urb->size_written = num_regs * REG_SIZE;
-
-   for (unsigned c = 0; c < comps; c++) {
-      brw_reg dest_comp = offset(dest, bld, c);
-      brw_reg data_comp = horiz_stride(offset(data, ubld8, comp_offset + c), 0);
-      bld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp);
-   }
-}
-
-static void
-emit_urb_direct_reads_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
-                          const brw_reg &dest, brw_reg urb_handle)
-{
-   assert(instr->def.bit_size == 32);
-
-   unsigned comps = instr->def.num_components;
-   if (comps == 0)
-      return;
-
-   nir_src *offset_nir_src = nir_get_io_offset_src(instr);
-   assert(nir_src_is_const(*offset_nir_src));
-
-   brw_builder ubld16 = bld.group(16, 0).exec_all();
-
-   const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
-                                     nir_src_as_uint(*offset_nir_src) +
-                                     component_from_intrinsic(instr);
-
-   if (offset_in_dwords > 0)
-      urb_handle = ubld16.ADD(urb_handle, brw_imm_ud(offset_in_dwords * 4));
-
-   brw_reg data = ubld16.vgrf(BRW_TYPE_UD, comps);
-   brw_reg srcs[URB_LOGICAL_NUM_SRCS];
-   srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
-
-   brw_inst *inst = ubld16.URB_READ(data, srcs, ARRAY_SIZE(srcs));
-   inst->size_written = 2 * comps * REG_SIZE;
-
-   for (unsigned c = 0; c < comps; c++) {
-      brw_reg dest_comp = offset(dest, bld, c);
-      brw_reg data_comp = horiz_stride(offset(data, ubld16, c), 0);
-      bld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp);
-   }
-}
-
-static void
-emit_urb_indirect_reads(const brw_builder &bld, nir_intrinsic_instr *instr,
-                        const brw_reg &dest, const brw_reg &offset_src, brw_reg urb_handle)
-{
-   assert(instr->def.bit_size == 32);
-
-   unsigned comps = instr->def.num_components;
-   if (comps == 0)
-      return;
-
-   brw_reg seq_ud;
-   {
-      brw_builder ubld8 = bld.group(8, 0).exec_all();
-      seq_ud = ubld8.vgrf(BRW_TYPE_UD, 1);
-      brw_reg seq_uw = ubld8.vgrf(BRW_TYPE_UW, 1);
-      ubld8.MOV(seq_uw, brw_reg(brw_imm_v(0x76543210)));
-      ubld8.MOV(seq_ud, seq_uw);
-      seq_ud = ubld8.SHL(seq_ud, brw_imm_ud(2));
-   }
-
-   const unsigned base_in_dwords = nir_intrinsic_base(instr) +
-                                   component_from_intrinsic(instr);
-
-   for (unsigned c = 0; c < comps; c++) {
-      for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
-         brw_builder bld8 = bld.group(8, q);
-
-         /* offset is always positive, so signedness doesn't matter */
-         assert(offset_src.type == BRW_TYPE_D ||
-                offset_src.type == BRW_TYPE_UD);
-         brw_reg off =
-            bld8.ADD(bld8.MOV(quarter(retype(offset_src, BRW_TYPE_UD), q)),
-                     brw_imm_ud(base_in_dwords + c));
-
-         STATIC_ASSERT(IS_POT(REG_SIZE) && REG_SIZE > 1);
-
-         brw_reg comp;
-         comp = bld8.AND(off, brw_imm_ud(0x3));
-         comp = bld8.SHL(comp, brw_imm_ud(ffs(REG_SIZE) - 1));
-         comp = bld8.ADD(comp, seq_ud);
-
-         off = bld8.SHR(off, brw_imm_ud(2));
-
-         brw_reg srcs[URB_LOGICAL_NUM_SRCS];
-         srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
-         srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
-
-         brw_reg data = bld8.vgrf(BRW_TYPE_UD, 4);
-
-         brw_urb_inst *urb = bld8.URB_READ(data, srcs, ARRAY_SIZE(srcs));
-         urb->size_written = 4 * REG_SIZE;
-
-         brw_reg dest_comp = offset(dest, bld, c);
-         bld8.emit(SHADER_OPCODE_MOV_INDIRECT,
-                   retype(quarter(dest_comp, q), BRW_TYPE_UD),
-                   data,
-                   comp,
-                   brw_imm_ud(4 * REG_SIZE));
-      }
-   }
-}
-
-static void
-emit_urb_indirect_reads_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
-                            const brw_reg &dest, const brw_reg &offset_src,
-                            brw_reg urb_handle)
-{
-   assert(instr->def.bit_size == 32);
-
-   unsigned comps = instr->def.num_components;
-   if (comps == 0)
-      return;
-
-   brw_builder ubld16 = bld.group(16, 0).exec_all();
-
-   const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
-                                     component_from_intrinsic(instr);
-
-   if (offset_in_dwords > 0)
-      urb_handle = ubld16.ADD(urb_handle, brw_imm_ud(offset_in_dwords * 4));
-
-   brw_reg data = ubld16.vgrf(BRW_TYPE_UD, comps);
-
-   for (unsigned q = 0; q < bld.dispatch_width() / 16; q++) {
-      brw_builder wbld = bld.group(16, q);
-
-      brw_reg addr = wbld.SHL(retype(horiz_offset(offset_src, 16 * q),
-                                     BRW_TYPE_UD),
-                              brw_imm_ud(2));
-
-      brw_reg srcs[URB_LOGICAL_NUM_SRCS];
-      srcs[URB_LOGICAL_SRC_HANDLE] = wbld.ADD(addr, urb_handle);
-
-      brw_inst *inst = wbld.URB_READ(data, srcs, ARRAY_SIZE(srcs));
-      inst->size_written = 2 * comps * REG_SIZE;
-
-      for (unsigned c = 0; c < comps; c++) {
-         brw_reg dest_comp = horiz_offset(offset(dest, bld, c), 16 * q);
-         brw_reg data_comp = offset(data, wbld, c);
-         wbld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp);
-      }
-   }
-}
-
-static void
-emit_task_mesh_store(nir_to_brw_state &ntb,
-                     const brw_builder &bld, nir_intrinsic_instr *instr,
-                     const brw_reg &urb_handle)
-{
-   brw_reg src = get_nir_src(ntb, instr->src[0], -1);
-   nir_src *offset_nir_src = nir_get_io_offset_src(instr);
-
-   if (nir_src_is_const(*offset_nir_src)) {
-      if (bld.shader->devinfo->ver >= 20)
-         emit_urb_direct_writes_xe2(bld, instr, src, urb_handle);
-      else
-         emit_urb_direct_writes(bld, instr, src, urb_handle);
-   } else {
-      if (bld.shader->devinfo->ver >= 20) {
-         emit_urb_indirect_writes_xe2(bld, instr, src,
-                                      get_nir_src(ntb, *offset_nir_src, 0),
-                                      urb_handle);
-         return;
-      }
-      bool use_mod = false;
-      unsigned mod;
-
-      /* Try to calculate the value of (offset + base) % 4. If we can do
-       * this, then we can do indirect writes using only 1 URB write.
-       */
-      use_mod = nir_mod_analysis(nir_get_scalar(offset_nir_src->ssa, 0), nir_type_uint, 4, &mod);
-      if (use_mod) {
-         mod += nir_intrinsic_base(instr) + component_from_intrinsic(instr);
-         mod %= 4;
-      }
-
-      if (use_mod) {
-         emit_urb_indirect_writes_mod(bld, instr, src,
-                                      get_nir_src(ntb, *offset_nir_src, 0),
-                                      urb_handle, mod);
-      } else {
-         emit_urb_indirect_writes(bld, instr, src,
-                                  get_nir_src(ntb, *offset_nir_src, 0),
-                                  urb_handle);
-      }
-   }
-}
-
-static void
-emit_task_mesh_load(nir_to_brw_state &ntb,
-                    const brw_builder &bld, nir_intrinsic_instr *instr,
-                    const brw_reg &urb_handle)
-{
-   brw_reg dest = get_nir_def(ntb, instr->def);
-   nir_src *offset_nir_src = nir_get_io_offset_src(instr);
-
-   /* TODO(mesh): for per_vertex and per_primitive, if we could keep around
-    * the non-array-index offset, we could use to decide if we can perform
-    * a single large aligned read instead one per component.
-    */
-
-   if (nir_src_is_const(*offset_nir_src)) {
-      if (bld.shader->devinfo->ver >= 20)
-         emit_urb_direct_reads_xe2(bld, instr, dest, urb_handle);
-      else
-         emit_urb_direct_reads(bld, instr, dest, urb_handle);
-   } else {
-      if (bld.shader->devinfo->ver >= 20)
-         emit_urb_indirect_reads_xe2(bld, instr, dest,
-                                     get_nir_src(ntb, *offset_nir_src, 0),
-                                     urb_handle);
-      else
-         emit_urb_indirect_reads(bld, instr, dest,
-                                 get_nir_src(ntb, *offset_nir_src, 0),
-                                 urb_handle);
-   }
-}
-
-static void
-brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const brw_builder &bld,
-                                nir_intrinsic_instr *instr)
+brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb,
+                                      nir_intrinsic_instr *instr)
 {
+   brw_builder &bld = ntb.bld;
    brw_shader &s = ntb.s;
 
    assert(s.stage == MESA_SHADER_MESH || s.stage == MESA_SHADER_TASK);
@@ -5466,6 +4904,11 @@ brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const brw_builder &
       dest = get_nir_def(ntb, instr->def);
 
    switch (instr->intrinsic) {
+   case nir_intrinsic_load_urb_input_handle_intel:
+      assert(s.stage == MESA_SHADER_MESH);
+      bld.MOV(retype(dest, BRW_TYPE_UD), payload.task_urb_input);
+      break;
+
    case nir_intrinsic_load_urb_output_handle_intel:
       bld.MOV(retype(dest, BRW_TYPE_UD), payload.urb_output);
       break;
@@ -5502,73 +4945,6 @@ brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const brw_builder &
    }
 }
 
-static void
-brw_from_nir_emit_task_intrinsic(nir_to_brw_state &ntb,
-                           nir_intrinsic_instr *instr)
-{
-   const brw_builder &bld = ntb.bld;
-   brw_shader &s = ntb.s;
-
-   assert(s.stage == MESA_SHADER_TASK);
-   const brw_task_mesh_thread_payload &payload = s.task_mesh_payload();
-
-   switch (instr->intrinsic) {
-   case nir_intrinsic_store_output:
-   case nir_intrinsic_store_task_payload:
-      emit_task_mesh_store(ntb, bld, instr, payload.urb_output);
-      break;
-
-   case nir_intrinsic_load_output:
-   case nir_intrinsic_load_task_payload:
-      emit_task_mesh_load(ntb, bld, instr, payload.urb_output);
-      break;
-
-   default:
-      brw_from_nir_emit_task_mesh_intrinsic(ntb, bld, instr);
-      break;
-   }
-}
-
-static void
-brw_from_nir_emit_mesh_intrinsic(nir_to_brw_state &ntb,
-                           nir_intrinsic_instr *instr)
-{
-   const brw_builder &bld = ntb.bld;
-   brw_shader &s = ntb.s;
-
-   assert(s.stage == MESA_SHADER_MESH);
-   const brw_task_mesh_thread_payload &payload = s.task_mesh_payload();
-
-   brw_reg dest;
-   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
-      dest = get_nir_def(ntb, instr->def);
-
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_urb_input_handle_intel:
-      bld.MOV(retype(dest, BRW_TYPE_UD), payload.task_urb_input);
-      break;
-   case nir_intrinsic_store_per_primitive_output:
-   case nir_intrinsic_store_per_vertex_output:
-   case nir_intrinsic_store_output:
-      emit_task_mesh_store(ntb, bld, instr, payload.urb_output);
-      break;
-
-   case nir_intrinsic_load_per_vertex_output:
-   case nir_intrinsic_load_per_primitive_output:
-   case nir_intrinsic_load_output:
-      emit_task_mesh_load(ntb, bld, instr, payload.urb_output);
-      break;
-
-   case nir_intrinsic_load_task_payload:
-      emit_task_mesh_load(ntb, bld, instr, payload.task_urb_input);
-      break;
-
-   default:
-      brw_from_nir_emit_task_mesh_intrinsic(ntb, bld, instr);
-      break;
-   }
-}
-
 static void
 brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
                       const brw_builder &bld, nir_intrinsic_instr *instr)
@@ -7562,10 +6938,9 @@ brw_from_nir_emit_instr(nir_to_brw_state &ntb, nir_instr *instr)
          brw_from_nir_emit_bs_intrinsic(ntb, nir_instr_as_intrinsic(instr));
          break;
       case MESA_SHADER_TASK:
-         brw_from_nir_emit_task_intrinsic(ntb, nir_instr_as_intrinsic(instr));
-         break;
       case MESA_SHADER_MESH:
-         brw_from_nir_emit_mesh_intrinsic(ntb, nir_instr_as_intrinsic(instr));
+         brw_from_nir_emit_task_mesh_intrinsic(ntb,
+                                               nir_instr_as_intrinsic(instr));
          break;
       default:
          UNREACHABLE("unsupported shader stage");