brw: Delete all the old backend mesh/task URB handling code

This has all been replaced by NIR lowering to URB intrinsics.

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38918>
This commit is contained in:
Kenneth Graunke
2025-12-08 01:00:57 -08:00
committed by Marge Bot
parent d0dc45955d
commit d831f38d11

View File

@@ -4872,15 +4872,6 @@ get_timestamp(const brw_builder &bld)
return dst;
}
static unsigned
component_from_intrinsic(nir_intrinsic_instr *instr)
{
if (nir_intrinsic_has_component(instr))
return nir_intrinsic_component(instr);
else
return 0;
}
static void
adjust_handle_and_offset(const brw_builder &bld,
brw_reg &urb_handle,
@@ -4899,563 +4890,10 @@ adjust_handle_and_offset(const brw_builder &bld,
}
static void
emit_urb_direct_vec4_write(const brw_builder &bld,
unsigned urb_global_offset,
const brw_reg &src,
brw_reg urb_handle,
unsigned dst_comp_offset,
unsigned comps,
unsigned mask)
{
assert(bld.shader->devinfo->ver < 20);
for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
brw_builder bld8 = bld.group(8, q);
brw_reg payload_srcs[8];
unsigned length = 0;
for (unsigned i = 0; i < dst_comp_offset; i++)
payload_srcs[length++] = reg_undef;
for (unsigned c = 0; c < comps; c++)
payload_srcs[length++] = quarter(offset(src, bld, c), q);
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
srcs[URB_LOGICAL_SRC_DATA] =
retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
brw_urb_inst *urb = bld8.URB_WRITE(srcs, ARRAY_SIZE(srcs));
urb->offset = urb_global_offset;
urb->components = length;
assert(urb->offset < 2048);
}
}
static void
emit_urb_direct_writes(const brw_builder &bld, nir_intrinsic_instr *instr,
const brw_reg &src, brw_reg urb_handle)
{
assert(bld.shader->devinfo->ver < 20);
assert(nir_src_bit_size(instr->src[0]) == 32);
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset_nir_src));
const unsigned comps = nir_src_num_components(instr->src[0]);
assert(comps <= 4);
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
nir_src_as_uint(*offset_nir_src) +
component_from_intrinsic(instr);
/* URB writes are vec4 aligned but the intrinsic offsets are in dwords.
* We can write up to 8 dwords, so single vec4 write is enough.
*/
const unsigned comp_shift = offset_in_dwords % 4;
const unsigned mask = nir_intrinsic_write_mask(instr) << comp_shift;
unsigned urb_global_offset = offset_in_dwords / 4;
adjust_handle_and_offset(bld, urb_handle, urb_global_offset);
emit_urb_direct_vec4_write(bld, urb_global_offset, src, urb_handle,
comp_shift, comps, mask);
}
static void
emit_urb_direct_vec4_write_xe2(const brw_builder &bld,
unsigned offset_in_bytes,
const brw_reg &src,
brw_reg urb_handle,
unsigned comps,
unsigned mask)
{
const struct intel_device_info *devinfo = bld.shader->devinfo;
const unsigned runit = reg_unit(devinfo);
const unsigned write_size = 8 * runit;
if (offset_in_bytes > 0) {
brw_builder bldall = bld.group(write_size, 0).exec_all();
urb_handle = bldall.ADD(urb_handle, brw_imm_ud(offset_in_bytes));
}
for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) {
brw_builder hbld = bld.group(write_size, q);
assert(comps <= 4);
brw_reg payload_srcs[4];
for (unsigned c = 0; c < comps; c++)
payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q);
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
srcs[URB_LOGICAL_SRC_DATA] =
retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
hbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
brw_urb_inst *urb = hbld.URB_WRITE(srcs, ARRAY_SIZE(srcs));
urb->components = comps;
}
}
static void
emit_urb_direct_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
const brw_reg &src, brw_reg urb_handle)
{
assert(nir_src_bit_size(instr->src[0]) == 32);
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset_nir_src));
const unsigned comps = nir_src_num_components(instr->src[0]);
assert(comps <= 4);
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
nir_src_as_uint(*offset_nir_src) +
component_from_intrinsic(instr);
const unsigned mask = nir_intrinsic_write_mask(instr);
emit_urb_direct_vec4_write_xe2(bld, offset_in_dwords * 4, src,
urb_handle, comps, mask);
}
static void
emit_urb_indirect_vec4_write(const brw_builder &bld,
const brw_reg &offset_src,
unsigned base,
const brw_reg &src,
brw_reg urb_handle,
unsigned dst_comp_offset,
unsigned comps,
unsigned mask)
{
assert(bld.shader->devinfo->ver < 20);
for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
brw_builder bld8 = bld.group(8, q);
/* offset is always positive, so signedness doesn't matter */
assert(offset_src.type == BRW_TYPE_D || offset_src.type == BRW_TYPE_UD);
brw_reg qtr = bld8.MOV(quarter(retype(offset_src, BRW_TYPE_UD), q));
brw_reg off = bld8.SHR(bld8.ADD(qtr, brw_imm_ud(base)), brw_imm_ud(2));
brw_reg payload_srcs[8];
unsigned length = 0;
for (unsigned i = 0; i < dst_comp_offset; i++)
payload_srcs[length++] = reg_undef;
for (unsigned c = 0; c < comps; c++)
payload_srcs[length++] = quarter(offset(src, bld, c), q);
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
srcs[URB_LOGICAL_SRC_DATA] =
retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
brw_urb_inst *urb = bld8.URB_WRITE(srcs, ARRAY_SIZE(srcs));
urb->components = length;
}
}
static void
emit_urb_indirect_writes_mod(const brw_builder &bld, nir_intrinsic_instr *instr,
const brw_reg &src, const brw_reg &offset_src,
brw_reg urb_handle, unsigned mod)
{
assert(bld.shader->devinfo->ver < 20);
assert(nir_src_bit_size(instr->src[0]) == 32);
const unsigned comps = nir_src_num_components(instr->src[0]);
assert(comps <= 4);
const unsigned base_in_dwords = nir_intrinsic_base(instr) +
component_from_intrinsic(instr);
const unsigned comp_shift = mod;
const unsigned mask = nir_intrinsic_write_mask(instr) << comp_shift;
emit_urb_indirect_vec4_write(bld, offset_src, base_in_dwords, src,
urb_handle, comp_shift, comps, mask);
}
static void
emit_urb_indirect_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
const brw_reg &src, const brw_reg &offset_src,
brw_reg urb_handle)
{
assert(nir_src_bit_size(instr->src[0]) == 32);
const struct intel_device_info *devinfo = bld.shader->devinfo;
const unsigned runit = reg_unit(devinfo);
const unsigned write_size = 8 * runit;
const unsigned comps = nir_src_num_components(instr->src[0]);
assert(comps <= 4);
const unsigned base_in_dwords = nir_intrinsic_base(instr) +
component_from_intrinsic(instr);
if (base_in_dwords > 0) {
brw_builder bldall = bld.group(write_size, 0).exec_all();
urb_handle = bldall.ADD(urb_handle, brw_imm_ud(base_in_dwords * 4));
}
const unsigned mask = nir_intrinsic_write_mask(instr);
for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) {
brw_builder wbld = bld.group(write_size, q);
brw_reg payload_srcs[4];
for (unsigned c = 0; c < comps; c++)
payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q);
brw_reg addr =
wbld.ADD(wbld.SHL(retype(horiz_offset(offset_src, write_size * q),
BRW_TYPE_UD),
brw_imm_ud(2)), urb_handle);
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = addr;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
srcs[URB_LOGICAL_SRC_DATA] =
retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
wbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
brw_urb_inst *urb = wbld.URB_WRITE(srcs, ARRAY_SIZE(srcs));
urb->components = comps;
}
}
static void
emit_urb_indirect_writes(const brw_builder &bld, nir_intrinsic_instr *instr,
const brw_reg &src, const brw_reg &offset_src,
brw_reg urb_handle)
{
assert(bld.shader->devinfo->ver < 20);
assert(nir_src_bit_size(instr->src[0]) == 32);
const unsigned comps = nir_src_num_components(instr->src[0]);
assert(comps <= 4);
const unsigned base_in_dwords = nir_intrinsic_base(instr) +
component_from_intrinsic(instr);
/* Use URB write message that allow different offsets per-slot. The offset
* is in units of vec4s (128 bits), so we use a write for each component,
* replicating it in the sources and applying the appropriate mask based on
* the dword offset.
*/
for (unsigned c = 0; c < comps; c++) {
if (((1 << c) & nir_intrinsic_write_mask(instr)) == 0)
continue;
brw_reg src_comp = offset(src, bld, c);
for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
brw_builder bld8 = bld.group(8, q);
/* offset is always positive, so signedness doesn't matter */
assert(offset_src.type == BRW_TYPE_D ||
offset_src.type == BRW_TYPE_UD);
brw_reg off =
bld8.ADD(quarter(retype(offset_src, BRW_TYPE_UD), q),
brw_imm_ud(c + base_in_dwords));
brw_reg m = bld8.AND(off, brw_imm_ud(0x3));
brw_reg mask = bld8.SHL(bld8.MOV(brw_imm_ud(1)), m);
brw_reg final_offset = bld8.SHR(off, brw_imm_ud(2));
brw_reg payload_srcs[4];
unsigned length = 0;
for (unsigned j = 0; j < 4; j++)
payload_srcs[length++] = quarter(src_comp, q);
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = final_offset;
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask;
srcs[URB_LOGICAL_SRC_DATA] =
retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
brw_urb_inst *urb = bld8.URB_WRITE(srcs, ARRAY_SIZE(srcs));
urb->components = length;
}
}
}
static void
emit_urb_direct_reads(const brw_builder &bld, nir_intrinsic_instr *instr,
const brw_reg &dest, brw_reg urb_handle)
{
assert(bld.shader->devinfo->ver < 20);
assert(instr->def.bit_size == 32);
unsigned comps = instr->def.num_components;
if (comps == 0)
return;
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset_nir_src));
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
nir_src_as_uint(*offset_nir_src) +
component_from_intrinsic(instr);
unsigned urb_global_offset = offset_in_dwords / 4;
adjust_handle_and_offset(bld, urb_handle, urb_global_offset);
const unsigned comp_offset = offset_in_dwords % 4;
const unsigned num_regs = comp_offset + comps;
brw_builder ubld8 = bld.group(8, 0).exec_all();
brw_reg data = ubld8.vgrf(BRW_TYPE_UD, num_regs);
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
brw_urb_inst *urb = ubld8.URB_READ(data, srcs, ARRAY_SIZE(srcs));
urb->offset = urb_global_offset;
assert(urb->offset < 2048);
urb->size_written = num_regs * REG_SIZE;
for (unsigned c = 0; c < comps; c++) {
brw_reg dest_comp = offset(dest, bld, c);
brw_reg data_comp = horiz_stride(offset(data, ubld8, comp_offset + c), 0);
bld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp);
}
}
static void
emit_urb_direct_reads_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
const brw_reg &dest, brw_reg urb_handle)
{
assert(instr->def.bit_size == 32);
unsigned comps = instr->def.num_components;
if (comps == 0)
return;
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset_nir_src));
brw_builder ubld16 = bld.group(16, 0).exec_all();
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
nir_src_as_uint(*offset_nir_src) +
component_from_intrinsic(instr);
if (offset_in_dwords > 0)
urb_handle = ubld16.ADD(urb_handle, brw_imm_ud(offset_in_dwords * 4));
brw_reg data = ubld16.vgrf(BRW_TYPE_UD, comps);
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
brw_inst *inst = ubld16.URB_READ(data, srcs, ARRAY_SIZE(srcs));
inst->size_written = 2 * comps * REG_SIZE;
for (unsigned c = 0; c < comps; c++) {
brw_reg dest_comp = offset(dest, bld, c);
brw_reg data_comp = horiz_stride(offset(data, ubld16, c), 0);
bld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp);
}
}
static void
emit_urb_indirect_reads(const brw_builder &bld, nir_intrinsic_instr *instr,
const brw_reg &dest, const brw_reg &offset_src, brw_reg urb_handle)
{
assert(instr->def.bit_size == 32);
unsigned comps = instr->def.num_components;
if (comps == 0)
return;
brw_reg seq_ud;
{
brw_builder ubld8 = bld.group(8, 0).exec_all();
seq_ud = ubld8.vgrf(BRW_TYPE_UD, 1);
brw_reg seq_uw = ubld8.vgrf(BRW_TYPE_UW, 1);
ubld8.MOV(seq_uw, brw_reg(brw_imm_v(0x76543210)));
ubld8.MOV(seq_ud, seq_uw);
seq_ud = ubld8.SHL(seq_ud, brw_imm_ud(2));
}
const unsigned base_in_dwords = nir_intrinsic_base(instr) +
component_from_intrinsic(instr);
for (unsigned c = 0; c < comps; c++) {
for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
brw_builder bld8 = bld.group(8, q);
/* offset is always positive, so signedness doesn't matter */
assert(offset_src.type == BRW_TYPE_D ||
offset_src.type == BRW_TYPE_UD);
brw_reg off =
bld8.ADD(bld8.MOV(quarter(retype(offset_src, BRW_TYPE_UD), q)),
brw_imm_ud(base_in_dwords + c));
STATIC_ASSERT(IS_POT(REG_SIZE) && REG_SIZE > 1);
brw_reg comp;
comp = bld8.AND(off, brw_imm_ud(0x3));
comp = bld8.SHL(comp, brw_imm_ud(ffs(REG_SIZE) - 1));
comp = bld8.ADD(comp, seq_ud);
off = bld8.SHR(off, brw_imm_ud(2));
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
brw_reg data = bld8.vgrf(BRW_TYPE_UD, 4);
brw_urb_inst *urb = bld8.URB_READ(data, srcs, ARRAY_SIZE(srcs));
urb->size_written = 4 * REG_SIZE;
brw_reg dest_comp = offset(dest, bld, c);
bld8.emit(SHADER_OPCODE_MOV_INDIRECT,
retype(quarter(dest_comp, q), BRW_TYPE_UD),
data,
comp,
brw_imm_ud(4 * REG_SIZE));
}
}
}
static void
emit_urb_indirect_reads_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
const brw_reg &dest, const brw_reg &offset_src,
brw_reg urb_handle)
{
assert(instr->def.bit_size == 32);
unsigned comps = instr->def.num_components;
if (comps == 0)
return;
brw_builder ubld16 = bld.group(16, 0).exec_all();
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
component_from_intrinsic(instr);
if (offset_in_dwords > 0)
urb_handle = ubld16.ADD(urb_handle, brw_imm_ud(offset_in_dwords * 4));
brw_reg data = ubld16.vgrf(BRW_TYPE_UD, comps);
for (unsigned q = 0; q < bld.dispatch_width() / 16; q++) {
brw_builder wbld = bld.group(16, q);
brw_reg addr = wbld.SHL(retype(horiz_offset(offset_src, 16 * q),
BRW_TYPE_UD),
brw_imm_ud(2));
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
srcs[URB_LOGICAL_SRC_HANDLE] = wbld.ADD(addr, urb_handle);
brw_inst *inst = wbld.URB_READ(data, srcs, ARRAY_SIZE(srcs));
inst->size_written = 2 * comps * REG_SIZE;
for (unsigned c = 0; c < comps; c++) {
brw_reg dest_comp = horiz_offset(offset(dest, bld, c), 16 * q);
brw_reg data_comp = offset(data, wbld, c);
wbld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp);
}
}
}
static void
emit_task_mesh_store(nir_to_brw_state &ntb,
const brw_builder &bld, nir_intrinsic_instr *instr,
const brw_reg &urb_handle)
{
brw_reg src = get_nir_src(ntb, instr->src[0], -1);
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
if (nir_src_is_const(*offset_nir_src)) {
if (bld.shader->devinfo->ver >= 20)
emit_urb_direct_writes_xe2(bld, instr, src, urb_handle);
else
emit_urb_direct_writes(bld, instr, src, urb_handle);
} else {
if (bld.shader->devinfo->ver >= 20) {
emit_urb_indirect_writes_xe2(bld, instr, src,
get_nir_src(ntb, *offset_nir_src, 0),
urb_handle);
return;
}
bool use_mod = false;
unsigned mod;
/* Try to calculate the value of (offset + base) % 4. If we can do
* this, then we can do indirect writes using only 1 URB write.
*/
use_mod = nir_mod_analysis(nir_get_scalar(offset_nir_src->ssa, 0), nir_type_uint, 4, &mod);
if (use_mod) {
mod += nir_intrinsic_base(instr) + component_from_intrinsic(instr);
mod %= 4;
}
if (use_mod) {
emit_urb_indirect_writes_mod(bld, instr, src,
get_nir_src(ntb, *offset_nir_src, 0),
urb_handle, mod);
} else {
emit_urb_indirect_writes(bld, instr, src,
get_nir_src(ntb, *offset_nir_src, 0),
urb_handle);
}
}
}
static void
emit_task_mesh_load(nir_to_brw_state &ntb,
const brw_builder &bld, nir_intrinsic_instr *instr,
const brw_reg &urb_handle)
{
brw_reg dest = get_nir_def(ntb, instr->def);
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
/* TODO(mesh): for per_vertex and per_primitive, if we could keep around
* the non-array-index offset, we could use to decide if we can perform
* a single large aligned read instead one per component.
*/
if (nir_src_is_const(*offset_nir_src)) {
if (bld.shader->devinfo->ver >= 20)
emit_urb_direct_reads_xe2(bld, instr, dest, urb_handle);
else
emit_urb_direct_reads(bld, instr, dest, urb_handle);
} else {
if (bld.shader->devinfo->ver >= 20)
emit_urb_indirect_reads_xe2(bld, instr, dest,
get_nir_src(ntb, *offset_nir_src, 0),
urb_handle);
else
emit_urb_indirect_reads(bld, instr, dest,
get_nir_src(ntb, *offset_nir_src, 0),
urb_handle);
}
}
static void
brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const brw_builder &bld,
nir_intrinsic_instr *instr)
brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb,
nir_intrinsic_instr *instr)
{
brw_builder &bld = ntb.bld;
brw_shader &s = ntb.s;
assert(s.stage == MESA_SHADER_MESH || s.stage == MESA_SHADER_TASK);
@@ -5466,6 +4904,11 @@ brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const brw_builder &
dest = get_nir_def(ntb, instr->def);
switch (instr->intrinsic) {
case nir_intrinsic_load_urb_input_handle_intel:
assert(s.stage == MESA_SHADER_MESH);
bld.MOV(retype(dest, BRW_TYPE_UD), payload.task_urb_input);
break;
case nir_intrinsic_load_urb_output_handle_intel:
bld.MOV(retype(dest, BRW_TYPE_UD), payload.urb_output);
break;
@@ -5502,73 +4945,6 @@ brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const brw_builder &
}
}
static void
brw_from_nir_emit_task_intrinsic(nir_to_brw_state &ntb,
nir_intrinsic_instr *instr)
{
const brw_builder &bld = ntb.bld;
brw_shader &s = ntb.s;
assert(s.stage == MESA_SHADER_TASK);
const brw_task_mesh_thread_payload &payload = s.task_mesh_payload();
switch (instr->intrinsic) {
case nir_intrinsic_store_output:
case nir_intrinsic_store_task_payload:
emit_task_mesh_store(ntb, bld, instr, payload.urb_output);
break;
case nir_intrinsic_load_output:
case nir_intrinsic_load_task_payload:
emit_task_mesh_load(ntb, bld, instr, payload.urb_output);
break;
default:
brw_from_nir_emit_task_mesh_intrinsic(ntb, bld, instr);
break;
}
}
static void
brw_from_nir_emit_mesh_intrinsic(nir_to_brw_state &ntb,
nir_intrinsic_instr *instr)
{
const brw_builder &bld = ntb.bld;
brw_shader &s = ntb.s;
assert(s.stage == MESA_SHADER_MESH);
const brw_task_mesh_thread_payload &payload = s.task_mesh_payload();
brw_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
dest = get_nir_def(ntb, instr->def);
switch (instr->intrinsic) {
case nir_intrinsic_load_urb_input_handle_intel:
bld.MOV(retype(dest, BRW_TYPE_UD), payload.task_urb_input);
break;
case nir_intrinsic_store_per_primitive_output:
case nir_intrinsic_store_per_vertex_output:
case nir_intrinsic_store_output:
emit_task_mesh_store(ntb, bld, instr, payload.urb_output);
break;
case nir_intrinsic_load_per_vertex_output:
case nir_intrinsic_load_per_primitive_output:
case nir_intrinsic_load_output:
emit_task_mesh_load(ntb, bld, instr, payload.urb_output);
break;
case nir_intrinsic_load_task_payload:
emit_task_mesh_load(ntb, bld, instr, payload.task_urb_input);
break;
default:
brw_from_nir_emit_task_mesh_intrinsic(ntb, bld, instr);
break;
}
}
static void
brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
const brw_builder &bld, nir_intrinsic_instr *instr)
@@ -7562,10 +6938,9 @@ brw_from_nir_emit_instr(nir_to_brw_state &ntb, nir_instr *instr)
brw_from_nir_emit_bs_intrinsic(ntb, nir_instr_as_intrinsic(instr));
break;
case MESA_SHADER_TASK:
brw_from_nir_emit_task_intrinsic(ntb, nir_instr_as_intrinsic(instr));
break;
case MESA_SHADER_MESH:
brw_from_nir_emit_mesh_intrinsic(ntb, nir_instr_as_intrinsic(instr));
brw_from_nir_emit_task_mesh_intrinsic(ntb,
nir_instr_as_intrinsic(instr));
break;
default:
UNREACHABLE("unsupported shader stage");