From d831f38d1139c0fd612c371629e47549eab756d1 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 8 Dec 2025 01:00:57 -0800 Subject: [PATCH] brw: Delete all the old backend mesh/task URB handling code This has all been replaced by NIR lowering to URB intrinsics. Reviewed-by: Alyssa Rosenzweig Part-of: --- src/intel/compiler/brw/brw_from_nir.cpp | 645 +----------------------- 1 file changed, 10 insertions(+), 635 deletions(-) diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index 19a183aceb2..78a868021b9 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -4872,15 +4872,6 @@ get_timestamp(const brw_builder &bld) return dst; } -static unsigned -component_from_intrinsic(nir_intrinsic_instr *instr) -{ - if (nir_intrinsic_has_component(instr)) - return nir_intrinsic_component(instr); - else - return 0; -} - static void adjust_handle_and_offset(const brw_builder &bld, brw_reg &urb_handle, @@ -4899,563 +4890,10 @@ adjust_handle_and_offset(const brw_builder &bld, } static void -emit_urb_direct_vec4_write(const brw_builder &bld, - unsigned urb_global_offset, - const brw_reg &src, - brw_reg urb_handle, - unsigned dst_comp_offset, - unsigned comps, - unsigned mask) -{ - assert(bld.shader->devinfo->ver < 20); - - for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) { - brw_builder bld8 = bld.group(8, q); - - brw_reg payload_srcs[8]; - unsigned length = 0; - - for (unsigned i = 0; i < dst_comp_offset; i++) - payload_srcs[length++] = reg_undef; - - for (unsigned c = 0; c < comps; c++) - payload_srcs[length++] = quarter(offset(src, bld, c), q); - - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask); - srcs[URB_LOGICAL_SRC_DATA] = - retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F); - bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0); - - brw_urb_inst *urb = bld8.URB_WRITE(srcs, ARRAY_SIZE(srcs)); - urb->offset = urb_global_offset; - urb->components = length; - assert(urb->offset < 2048); - } -} - -static void -emit_urb_direct_writes(const brw_builder &bld, nir_intrinsic_instr *instr, - const brw_reg &src, brw_reg urb_handle) -{ - assert(bld.shader->devinfo->ver < 20); - assert(nir_src_bit_size(instr->src[0]) == 32); - - nir_src *offset_nir_src = nir_get_io_offset_src(instr); - assert(nir_src_is_const(*offset_nir_src)); - - const unsigned comps = nir_src_num_components(instr->src[0]); - assert(comps <= 4); - - const unsigned offset_in_dwords = nir_intrinsic_base(instr) + - nir_src_as_uint(*offset_nir_src) + - component_from_intrinsic(instr); - - /* URB writes are vec4 aligned but the intrinsic offsets are in dwords. - * We can write up to 8 dwords, so single vec4 write is enough. - */ - const unsigned comp_shift = offset_in_dwords % 4; - const unsigned mask = nir_intrinsic_write_mask(instr) << comp_shift; - - unsigned urb_global_offset = offset_in_dwords / 4; - adjust_handle_and_offset(bld, urb_handle, urb_global_offset); - - emit_urb_direct_vec4_write(bld, urb_global_offset, src, urb_handle, - comp_shift, comps, mask); -} - -static void -emit_urb_direct_vec4_write_xe2(const brw_builder &bld, - unsigned offset_in_bytes, - const brw_reg &src, - brw_reg urb_handle, - unsigned comps, - unsigned mask) -{ - const struct intel_device_info *devinfo = bld.shader->devinfo; - const unsigned runit = reg_unit(devinfo); - const unsigned write_size = 8 * runit; - - if (offset_in_bytes > 0) { - brw_builder bldall = bld.group(write_size, 0).exec_all(); - urb_handle = bldall.ADD(urb_handle, brw_imm_ud(offset_in_bytes)); - } - - for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) { - brw_builder hbld = bld.group(write_size, q); - - assert(comps <= 4); - brw_reg payload_srcs[4]; - - for (unsigned c = 0; c < comps; c++) - payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q); - - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask); - srcs[URB_LOGICAL_SRC_DATA] = - retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F); - hbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0); - - brw_urb_inst *urb = hbld.URB_WRITE(srcs, ARRAY_SIZE(srcs)); - urb->components = comps; - } -} - -static void -emit_urb_direct_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr, - const brw_reg &src, brw_reg urb_handle) -{ - assert(nir_src_bit_size(instr->src[0]) == 32); - - nir_src *offset_nir_src = nir_get_io_offset_src(instr); - assert(nir_src_is_const(*offset_nir_src)); - - const unsigned comps = nir_src_num_components(instr->src[0]); - assert(comps <= 4); - - const unsigned offset_in_dwords = nir_intrinsic_base(instr) + - nir_src_as_uint(*offset_nir_src) + - component_from_intrinsic(instr); - - const unsigned mask = nir_intrinsic_write_mask(instr); - - emit_urb_direct_vec4_write_xe2(bld, offset_in_dwords * 4, src, - urb_handle, comps, mask); -} - -static void -emit_urb_indirect_vec4_write(const brw_builder &bld, - const brw_reg &offset_src, - unsigned base, - const brw_reg &src, - brw_reg urb_handle, - unsigned dst_comp_offset, - unsigned comps, - unsigned mask) -{ - assert(bld.shader->devinfo->ver < 20); - - for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) { - brw_builder bld8 = bld.group(8, q); - - /* offset is always positive, so signedness doesn't matter */ - assert(offset_src.type == BRW_TYPE_D || offset_src.type == BRW_TYPE_UD); - brw_reg qtr = bld8.MOV(quarter(retype(offset_src, BRW_TYPE_UD), q)); - brw_reg off = bld8.SHR(bld8.ADD(qtr, brw_imm_ud(base)), brw_imm_ud(2)); - - brw_reg payload_srcs[8]; - unsigned length = 0; - - for (unsigned i = 0; i < dst_comp_offset; i++) - payload_srcs[length++] = reg_undef; - - for (unsigned c = 0; c < comps; c++) - payload_srcs[length++] = quarter(offset(src, bld, c), q); - - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; - srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask); - srcs[URB_LOGICAL_SRC_DATA] = - retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F); - bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0); - - brw_urb_inst *urb = bld8.URB_WRITE(srcs, ARRAY_SIZE(srcs)); - urb->components = length; - } -} - -static void -emit_urb_indirect_writes_mod(const brw_builder &bld, nir_intrinsic_instr *instr, - const brw_reg &src, const brw_reg &offset_src, - brw_reg urb_handle, unsigned mod) -{ - assert(bld.shader->devinfo->ver < 20); - assert(nir_src_bit_size(instr->src[0]) == 32); - - const unsigned comps = nir_src_num_components(instr->src[0]); - assert(comps <= 4); - - const unsigned base_in_dwords = nir_intrinsic_base(instr) + - component_from_intrinsic(instr); - - const unsigned comp_shift = mod; - const unsigned mask = nir_intrinsic_write_mask(instr) << comp_shift; - - emit_urb_indirect_vec4_write(bld, offset_src, base_in_dwords, src, - urb_handle, comp_shift, comps, mask); -} - -static void -emit_urb_indirect_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr, - const brw_reg &src, const brw_reg &offset_src, - brw_reg urb_handle) -{ - assert(nir_src_bit_size(instr->src[0]) == 32); - - const struct intel_device_info *devinfo = bld.shader->devinfo; - const unsigned runit = reg_unit(devinfo); - const unsigned write_size = 8 * runit; - - const unsigned comps = nir_src_num_components(instr->src[0]); - assert(comps <= 4); - - const unsigned base_in_dwords = nir_intrinsic_base(instr) + - component_from_intrinsic(instr); - - if (base_in_dwords > 0) { - brw_builder bldall = bld.group(write_size, 0).exec_all(); - urb_handle = bldall.ADD(urb_handle, brw_imm_ud(base_in_dwords * 4)); - } - - const unsigned mask = nir_intrinsic_write_mask(instr); - - for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) { - brw_builder wbld = bld.group(write_size, q); - - brw_reg payload_srcs[4]; - - for (unsigned c = 0; c < comps; c++) - payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q); - - brw_reg addr = - wbld.ADD(wbld.SHL(retype(horiz_offset(offset_src, write_size * q), - BRW_TYPE_UD), - brw_imm_ud(2)), urb_handle); - - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = addr; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask); - srcs[URB_LOGICAL_SRC_DATA] = - retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F); - wbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0); - - brw_urb_inst *urb = wbld.URB_WRITE(srcs, ARRAY_SIZE(srcs)); - urb->components = comps; - } -} - -static void -emit_urb_indirect_writes(const brw_builder &bld, nir_intrinsic_instr *instr, - const brw_reg &src, const brw_reg &offset_src, - brw_reg urb_handle) -{ - assert(bld.shader->devinfo->ver < 20); - assert(nir_src_bit_size(instr->src[0]) == 32); - - const unsigned comps = nir_src_num_components(instr->src[0]); - assert(comps <= 4); - - const unsigned base_in_dwords = nir_intrinsic_base(instr) + - component_from_intrinsic(instr); - - /* Use URB write message that allow different offsets per-slot. The offset - * is in units of vec4s (128 bits), so we use a write for each component, - * replicating it in the sources and applying the appropriate mask based on - * the dword offset. - */ - - for (unsigned c = 0; c < comps; c++) { - if (((1 << c) & nir_intrinsic_write_mask(instr)) == 0) - continue; - - brw_reg src_comp = offset(src, bld, c); - - for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) { - brw_builder bld8 = bld.group(8, q); - - /* offset is always positive, so signedness doesn't matter */ - assert(offset_src.type == BRW_TYPE_D || - offset_src.type == BRW_TYPE_UD); - - brw_reg off = - bld8.ADD(quarter(retype(offset_src, BRW_TYPE_UD), q), - brw_imm_ud(c + base_in_dwords)); - brw_reg m = bld8.AND(off, brw_imm_ud(0x3)); - brw_reg mask = bld8.SHL(bld8.MOV(brw_imm_ud(1)), m); - brw_reg final_offset = bld8.SHR(off, brw_imm_ud(2)); - - brw_reg payload_srcs[4]; - unsigned length = 0; - - for (unsigned j = 0; j < 4; j++) - payload_srcs[length++] = quarter(src_comp, q); - - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; - srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = final_offset; - srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask; - srcs[URB_LOGICAL_SRC_DATA] = - retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F); - bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0); - - brw_urb_inst *urb = bld8.URB_WRITE(srcs, ARRAY_SIZE(srcs)); - urb->components = length; - } - } -} - -static void -emit_urb_direct_reads(const brw_builder &bld, nir_intrinsic_instr *instr, - const brw_reg &dest, brw_reg urb_handle) -{ - assert(bld.shader->devinfo->ver < 20); - assert(instr->def.bit_size == 32); - - unsigned comps = instr->def.num_components; - if (comps == 0) - return; - - nir_src *offset_nir_src = nir_get_io_offset_src(instr); - assert(nir_src_is_const(*offset_nir_src)); - - const unsigned offset_in_dwords = nir_intrinsic_base(instr) + - nir_src_as_uint(*offset_nir_src) + - component_from_intrinsic(instr); - - unsigned urb_global_offset = offset_in_dwords / 4; - adjust_handle_and_offset(bld, urb_handle, urb_global_offset); - - const unsigned comp_offset = offset_in_dwords % 4; - const unsigned num_regs = comp_offset + comps; - - brw_builder ubld8 = bld.group(8, 0).exec_all(); - brw_reg data = ubld8.vgrf(BRW_TYPE_UD, num_regs); - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; - - brw_urb_inst *urb = ubld8.URB_READ(data, srcs, ARRAY_SIZE(srcs)); - urb->offset = urb_global_offset; - assert(urb->offset < 2048); - urb->size_written = num_regs * REG_SIZE; - - for (unsigned c = 0; c < comps; c++) { - brw_reg dest_comp = offset(dest, bld, c); - brw_reg data_comp = horiz_stride(offset(data, ubld8, comp_offset + c), 0); - bld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp); - } -} - -static void -emit_urb_direct_reads_xe2(const brw_builder &bld, nir_intrinsic_instr *instr, - const brw_reg &dest, brw_reg urb_handle) -{ - assert(instr->def.bit_size == 32); - - unsigned comps = instr->def.num_components; - if (comps == 0) - return; - - nir_src *offset_nir_src = nir_get_io_offset_src(instr); - assert(nir_src_is_const(*offset_nir_src)); - - brw_builder ubld16 = bld.group(16, 0).exec_all(); - - const unsigned offset_in_dwords = nir_intrinsic_base(instr) + - nir_src_as_uint(*offset_nir_src) + - component_from_intrinsic(instr); - - if (offset_in_dwords > 0) - urb_handle = ubld16.ADD(urb_handle, brw_imm_ud(offset_in_dwords * 4)); - - brw_reg data = ubld16.vgrf(BRW_TYPE_UD, comps); - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; - - brw_inst *inst = ubld16.URB_READ(data, srcs, ARRAY_SIZE(srcs)); - inst->size_written = 2 * comps * REG_SIZE; - - for (unsigned c = 0; c < comps; c++) { - brw_reg dest_comp = offset(dest, bld, c); - brw_reg data_comp = horiz_stride(offset(data, ubld16, c), 0); - bld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp); - } -} - -static void -emit_urb_indirect_reads(const brw_builder &bld, nir_intrinsic_instr *instr, - const brw_reg &dest, const brw_reg &offset_src, brw_reg urb_handle) -{ - assert(instr->def.bit_size == 32); - - unsigned comps = instr->def.num_components; - if (comps == 0) - return; - - brw_reg seq_ud; - { - brw_builder ubld8 = bld.group(8, 0).exec_all(); - seq_ud = ubld8.vgrf(BRW_TYPE_UD, 1); - brw_reg seq_uw = ubld8.vgrf(BRW_TYPE_UW, 1); - ubld8.MOV(seq_uw, brw_reg(brw_imm_v(0x76543210))); - ubld8.MOV(seq_ud, seq_uw); - seq_ud = ubld8.SHL(seq_ud, brw_imm_ud(2)); - } - - const unsigned base_in_dwords = nir_intrinsic_base(instr) + - component_from_intrinsic(instr); - - for (unsigned c = 0; c < comps; c++) { - for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) { - brw_builder bld8 = bld.group(8, q); - - /* offset is always positive, so signedness doesn't matter */ - assert(offset_src.type == BRW_TYPE_D || - offset_src.type == BRW_TYPE_UD); - brw_reg off = - bld8.ADD(bld8.MOV(quarter(retype(offset_src, BRW_TYPE_UD), q)), - brw_imm_ud(base_in_dwords + c)); - - STATIC_ASSERT(IS_POT(REG_SIZE) && REG_SIZE > 1); - - brw_reg comp; - comp = bld8.AND(off, brw_imm_ud(0x3)); - comp = bld8.SHL(comp, brw_imm_ud(ffs(REG_SIZE) - 1)); - comp = bld8.ADD(comp, seq_ud); - - off = bld8.SHR(off, brw_imm_ud(2)); - - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; - srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off; - - brw_reg data = bld8.vgrf(BRW_TYPE_UD, 4); - - brw_urb_inst *urb = bld8.URB_READ(data, srcs, ARRAY_SIZE(srcs)); - urb->size_written = 4 * REG_SIZE; - - brw_reg dest_comp = offset(dest, bld, c); - bld8.emit(SHADER_OPCODE_MOV_INDIRECT, - retype(quarter(dest_comp, q), BRW_TYPE_UD), - data, - comp, - brw_imm_ud(4 * REG_SIZE)); - } - } -} - -static void -emit_urb_indirect_reads_xe2(const brw_builder &bld, nir_intrinsic_instr *instr, - const brw_reg &dest, const brw_reg &offset_src, - brw_reg urb_handle) -{ - assert(instr->def.bit_size == 32); - - unsigned comps = instr->def.num_components; - if (comps == 0) - return; - - brw_builder ubld16 = bld.group(16, 0).exec_all(); - - const unsigned offset_in_dwords = nir_intrinsic_base(instr) + - component_from_intrinsic(instr); - - if (offset_in_dwords > 0) - urb_handle = ubld16.ADD(urb_handle, brw_imm_ud(offset_in_dwords * 4)); - - brw_reg data = ubld16.vgrf(BRW_TYPE_UD, comps); - - for (unsigned q = 0; q < bld.dispatch_width() / 16; q++) { - brw_builder wbld = bld.group(16, q); - - brw_reg addr = wbld.SHL(retype(horiz_offset(offset_src, 16 * q), - BRW_TYPE_UD), - brw_imm_ud(2)); - - brw_reg srcs[URB_LOGICAL_NUM_SRCS]; - srcs[URB_LOGICAL_SRC_HANDLE] = wbld.ADD(addr, urb_handle); - - brw_inst *inst = wbld.URB_READ(data, srcs, ARRAY_SIZE(srcs)); - inst->size_written = 2 * comps * REG_SIZE; - - for (unsigned c = 0; c < comps; c++) { - brw_reg dest_comp = horiz_offset(offset(dest, bld, c), 16 * q); - brw_reg data_comp = offset(data, wbld, c); - wbld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp); - } - } -} - -static void -emit_task_mesh_store(nir_to_brw_state &ntb, - const brw_builder &bld, nir_intrinsic_instr *instr, - const brw_reg &urb_handle) -{ - brw_reg src = get_nir_src(ntb, instr->src[0], -1); - nir_src *offset_nir_src = nir_get_io_offset_src(instr); - - if (nir_src_is_const(*offset_nir_src)) { - if (bld.shader->devinfo->ver >= 20) - emit_urb_direct_writes_xe2(bld, instr, src, urb_handle); - else - emit_urb_direct_writes(bld, instr, src, urb_handle); - } else { - if (bld.shader->devinfo->ver >= 20) { - emit_urb_indirect_writes_xe2(bld, instr, src, - get_nir_src(ntb, *offset_nir_src, 0), - urb_handle); - return; - } - bool use_mod = false; - unsigned mod; - - /* Try to calculate the value of (offset + base) % 4. If we can do - * this, then we can do indirect writes using only 1 URB write. - */ - use_mod = nir_mod_analysis(nir_get_scalar(offset_nir_src->ssa, 0), nir_type_uint, 4, &mod); - if (use_mod) { - mod += nir_intrinsic_base(instr) + component_from_intrinsic(instr); - mod %= 4; - } - - if (use_mod) { - emit_urb_indirect_writes_mod(bld, instr, src, - get_nir_src(ntb, *offset_nir_src, 0), - urb_handle, mod); - } else { - emit_urb_indirect_writes(bld, instr, src, - get_nir_src(ntb, *offset_nir_src, 0), - urb_handle); - } - } -} - -static void -emit_task_mesh_load(nir_to_brw_state &ntb, - const brw_builder &bld, nir_intrinsic_instr *instr, - const brw_reg &urb_handle) -{ - brw_reg dest = get_nir_def(ntb, instr->def); - nir_src *offset_nir_src = nir_get_io_offset_src(instr); - - /* TODO(mesh): for per_vertex and per_primitive, if we could keep around - * the non-array-index offset, we could use to decide if we can perform - * a single large aligned read instead one per component. - */ - - if (nir_src_is_const(*offset_nir_src)) { - if (bld.shader->devinfo->ver >= 20) - emit_urb_direct_reads_xe2(bld, instr, dest, urb_handle); - else - emit_urb_direct_reads(bld, instr, dest, urb_handle); - } else { - if (bld.shader->devinfo->ver >= 20) - emit_urb_indirect_reads_xe2(bld, instr, dest, - get_nir_src(ntb, *offset_nir_src, 0), - urb_handle); - else - emit_urb_indirect_reads(bld, instr, dest, - get_nir_src(ntb, *offset_nir_src, 0), - urb_handle); - } -} - -static void -brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const brw_builder &bld, - nir_intrinsic_instr *instr) +brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, + nir_intrinsic_instr *instr) { + brw_builder &bld = ntb.bld; brw_shader &s = ntb.s; assert(s.stage == MESA_SHADER_MESH || s.stage == MESA_SHADER_TASK); @@ -5466,6 +4904,11 @@ brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const brw_builder & dest = get_nir_def(ntb, instr->def); switch (instr->intrinsic) { + case nir_intrinsic_load_urb_input_handle_intel: + assert(s.stage == MESA_SHADER_MESH); + bld.MOV(retype(dest, BRW_TYPE_UD), payload.task_urb_input); + break; + case nir_intrinsic_load_urb_output_handle_intel: bld.MOV(retype(dest, BRW_TYPE_UD), payload.urb_output); break; @@ -5502,73 +4945,6 @@ brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const brw_builder & } } -static void -brw_from_nir_emit_task_intrinsic(nir_to_brw_state &ntb, - nir_intrinsic_instr *instr) -{ - const brw_builder &bld = ntb.bld; - brw_shader &s = ntb.s; - - assert(s.stage == MESA_SHADER_TASK); - const brw_task_mesh_thread_payload &payload = s.task_mesh_payload(); - - switch (instr->intrinsic) { - case nir_intrinsic_store_output: - case nir_intrinsic_store_task_payload: - emit_task_mesh_store(ntb, bld, instr, payload.urb_output); - break; - - case nir_intrinsic_load_output: - case nir_intrinsic_load_task_payload: - emit_task_mesh_load(ntb, bld, instr, payload.urb_output); - break; - - default: - brw_from_nir_emit_task_mesh_intrinsic(ntb, bld, instr); - break; - } -} - -static void -brw_from_nir_emit_mesh_intrinsic(nir_to_brw_state &ntb, - nir_intrinsic_instr *instr) -{ - const brw_builder &bld = ntb.bld; - brw_shader &s = ntb.s; - - assert(s.stage == MESA_SHADER_MESH); - const brw_task_mesh_thread_payload &payload = s.task_mesh_payload(); - - brw_reg dest; - if (nir_intrinsic_infos[instr->intrinsic].has_dest) - dest = get_nir_def(ntb, instr->def); - - switch (instr->intrinsic) { - case nir_intrinsic_load_urb_input_handle_intel: - bld.MOV(retype(dest, BRW_TYPE_UD), payload.task_urb_input); - break; - case nir_intrinsic_store_per_primitive_output: - case nir_intrinsic_store_per_vertex_output: - case nir_intrinsic_store_output: - emit_task_mesh_store(ntb, bld, instr, payload.urb_output); - break; - - case nir_intrinsic_load_per_vertex_output: - case nir_intrinsic_load_per_primitive_output: - case nir_intrinsic_load_output: - emit_task_mesh_load(ntb, bld, instr, payload.urb_output); - break; - - case nir_intrinsic_load_task_payload: - emit_task_mesh_load(ntb, bld, instr, payload.task_urb_input); - break; - - default: - brw_from_nir_emit_task_mesh_intrinsic(ntb, bld, instr); - break; - } -} - static void brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, const brw_builder &bld, nir_intrinsic_instr *instr) @@ -7562,10 +6938,9 @@ brw_from_nir_emit_instr(nir_to_brw_state &ntb, nir_instr *instr) brw_from_nir_emit_bs_intrinsic(ntb, nir_instr_as_intrinsic(instr)); break; case MESA_SHADER_TASK: - brw_from_nir_emit_task_intrinsic(ntb, nir_instr_as_intrinsic(instr)); - break; case MESA_SHADER_MESH: - brw_from_nir_emit_mesh_intrinsic(ntb, nir_instr_as_intrinsic(instr)); + brw_from_nir_emit_task_mesh_intrinsic(ntb, + nir_instr_as_intrinsic(instr)); break; default: UNREACHABLE("unsupported shader stage");