brw: Delete all the old backend mesh/task URB handling code
This has all been replaced by NIR lowering to URB intrinsics. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38918>
This commit is contained in:
committed by
Marge Bot
parent
d0dc45955d
commit
d831f38d11
@@ -4872,15 +4872,6 @@ get_timestamp(const brw_builder &bld)
|
||||
return dst;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
component_from_intrinsic(nir_intrinsic_instr *instr)
|
||||
{
|
||||
if (nir_intrinsic_has_component(instr))
|
||||
return nir_intrinsic_component(instr);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
adjust_handle_and_offset(const brw_builder &bld,
|
||||
brw_reg &urb_handle,
|
||||
@@ -4899,563 +4890,10 @@ adjust_handle_and_offset(const brw_builder &bld,
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_direct_vec4_write(const brw_builder &bld,
|
||||
unsigned urb_global_offset,
|
||||
const brw_reg &src,
|
||||
brw_reg urb_handle,
|
||||
unsigned dst_comp_offset,
|
||||
unsigned comps,
|
||||
unsigned mask)
|
||||
{
|
||||
assert(bld.shader->devinfo->ver < 20);
|
||||
|
||||
for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
|
||||
brw_builder bld8 = bld.group(8, q);
|
||||
|
||||
brw_reg payload_srcs[8];
|
||||
unsigned length = 0;
|
||||
|
||||
for (unsigned i = 0; i < dst_comp_offset; i++)
|
||||
payload_srcs[length++] = reg_undef;
|
||||
|
||||
for (unsigned c = 0; c < comps; c++)
|
||||
payload_srcs[length++] = quarter(offset(src, bld, c), q);
|
||||
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
|
||||
srcs[URB_LOGICAL_SRC_DATA] =
|
||||
retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
|
||||
bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
|
||||
|
||||
brw_urb_inst *urb = bld8.URB_WRITE(srcs, ARRAY_SIZE(srcs));
|
||||
urb->offset = urb_global_offset;
|
||||
urb->components = length;
|
||||
assert(urb->offset < 2048);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_direct_writes(const brw_builder &bld, nir_intrinsic_instr *instr,
|
||||
const brw_reg &src, brw_reg urb_handle)
|
||||
{
|
||||
assert(bld.shader->devinfo->ver < 20);
|
||||
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||
|
||||
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
|
||||
assert(nir_src_is_const(*offset_nir_src));
|
||||
|
||||
const unsigned comps = nir_src_num_components(instr->src[0]);
|
||||
assert(comps <= 4);
|
||||
|
||||
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
|
||||
nir_src_as_uint(*offset_nir_src) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
/* URB writes are vec4 aligned but the intrinsic offsets are in dwords.
|
||||
* We can write up to 8 dwords, so single vec4 write is enough.
|
||||
*/
|
||||
const unsigned comp_shift = offset_in_dwords % 4;
|
||||
const unsigned mask = nir_intrinsic_write_mask(instr) << comp_shift;
|
||||
|
||||
unsigned urb_global_offset = offset_in_dwords / 4;
|
||||
adjust_handle_and_offset(bld, urb_handle, urb_global_offset);
|
||||
|
||||
emit_urb_direct_vec4_write(bld, urb_global_offset, src, urb_handle,
|
||||
comp_shift, comps, mask);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_direct_vec4_write_xe2(const brw_builder &bld,
|
||||
unsigned offset_in_bytes,
|
||||
const brw_reg &src,
|
||||
brw_reg urb_handle,
|
||||
unsigned comps,
|
||||
unsigned mask)
|
||||
{
|
||||
const struct intel_device_info *devinfo = bld.shader->devinfo;
|
||||
const unsigned runit = reg_unit(devinfo);
|
||||
const unsigned write_size = 8 * runit;
|
||||
|
||||
if (offset_in_bytes > 0) {
|
||||
brw_builder bldall = bld.group(write_size, 0).exec_all();
|
||||
urb_handle = bldall.ADD(urb_handle, brw_imm_ud(offset_in_bytes));
|
||||
}
|
||||
|
||||
for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) {
|
||||
brw_builder hbld = bld.group(write_size, q);
|
||||
|
||||
assert(comps <= 4);
|
||||
brw_reg payload_srcs[4];
|
||||
|
||||
for (unsigned c = 0; c < comps; c++)
|
||||
payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q);
|
||||
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
|
||||
srcs[URB_LOGICAL_SRC_DATA] =
|
||||
retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
|
||||
hbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
|
||||
|
||||
brw_urb_inst *urb = hbld.URB_WRITE(srcs, ARRAY_SIZE(srcs));
|
||||
urb->components = comps;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_direct_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
|
||||
const brw_reg &src, brw_reg urb_handle)
|
||||
{
|
||||
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||
|
||||
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
|
||||
assert(nir_src_is_const(*offset_nir_src));
|
||||
|
||||
const unsigned comps = nir_src_num_components(instr->src[0]);
|
||||
assert(comps <= 4);
|
||||
|
||||
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
|
||||
nir_src_as_uint(*offset_nir_src) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
const unsigned mask = nir_intrinsic_write_mask(instr);
|
||||
|
||||
emit_urb_direct_vec4_write_xe2(bld, offset_in_dwords * 4, src,
|
||||
urb_handle, comps, mask);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_indirect_vec4_write(const brw_builder &bld,
|
||||
const brw_reg &offset_src,
|
||||
unsigned base,
|
||||
const brw_reg &src,
|
||||
brw_reg urb_handle,
|
||||
unsigned dst_comp_offset,
|
||||
unsigned comps,
|
||||
unsigned mask)
|
||||
{
|
||||
assert(bld.shader->devinfo->ver < 20);
|
||||
|
||||
for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
|
||||
brw_builder bld8 = bld.group(8, q);
|
||||
|
||||
/* offset is always positive, so signedness doesn't matter */
|
||||
assert(offset_src.type == BRW_TYPE_D || offset_src.type == BRW_TYPE_UD);
|
||||
brw_reg qtr = bld8.MOV(quarter(retype(offset_src, BRW_TYPE_UD), q));
|
||||
brw_reg off = bld8.SHR(bld8.ADD(qtr, brw_imm_ud(base)), brw_imm_ud(2));
|
||||
|
||||
brw_reg payload_srcs[8];
|
||||
unsigned length = 0;
|
||||
|
||||
for (unsigned i = 0; i < dst_comp_offset; i++)
|
||||
payload_srcs[length++] = reg_undef;
|
||||
|
||||
for (unsigned c = 0; c < comps; c++)
|
||||
payload_srcs[length++] = quarter(offset(src, bld, c), q);
|
||||
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
|
||||
srcs[URB_LOGICAL_SRC_DATA] =
|
||||
retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
|
||||
bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
|
||||
|
||||
brw_urb_inst *urb = bld8.URB_WRITE(srcs, ARRAY_SIZE(srcs));
|
||||
urb->components = length;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_indirect_writes_mod(const brw_builder &bld, nir_intrinsic_instr *instr,
|
||||
const brw_reg &src, const brw_reg &offset_src,
|
||||
brw_reg urb_handle, unsigned mod)
|
||||
{
|
||||
assert(bld.shader->devinfo->ver < 20);
|
||||
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||
|
||||
const unsigned comps = nir_src_num_components(instr->src[0]);
|
||||
assert(comps <= 4);
|
||||
|
||||
const unsigned base_in_dwords = nir_intrinsic_base(instr) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
const unsigned comp_shift = mod;
|
||||
const unsigned mask = nir_intrinsic_write_mask(instr) << comp_shift;
|
||||
|
||||
emit_urb_indirect_vec4_write(bld, offset_src, base_in_dwords, src,
|
||||
urb_handle, comp_shift, comps, mask);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_indirect_writes_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
|
||||
const brw_reg &src, const brw_reg &offset_src,
|
||||
brw_reg urb_handle)
|
||||
{
|
||||
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||
|
||||
const struct intel_device_info *devinfo = bld.shader->devinfo;
|
||||
const unsigned runit = reg_unit(devinfo);
|
||||
const unsigned write_size = 8 * runit;
|
||||
|
||||
const unsigned comps = nir_src_num_components(instr->src[0]);
|
||||
assert(comps <= 4);
|
||||
|
||||
const unsigned base_in_dwords = nir_intrinsic_base(instr) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
if (base_in_dwords > 0) {
|
||||
brw_builder bldall = bld.group(write_size, 0).exec_all();
|
||||
urb_handle = bldall.ADD(urb_handle, brw_imm_ud(base_in_dwords * 4));
|
||||
}
|
||||
|
||||
const unsigned mask = nir_intrinsic_write_mask(instr);
|
||||
|
||||
for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) {
|
||||
brw_builder wbld = bld.group(write_size, q);
|
||||
|
||||
brw_reg payload_srcs[4];
|
||||
|
||||
for (unsigned c = 0; c < comps; c++)
|
||||
payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q);
|
||||
|
||||
brw_reg addr =
|
||||
wbld.ADD(wbld.SHL(retype(horiz_offset(offset_src, write_size * q),
|
||||
BRW_TYPE_UD),
|
||||
brw_imm_ud(2)), urb_handle);
|
||||
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = addr;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask);
|
||||
srcs[URB_LOGICAL_SRC_DATA] =
|
||||
retype(brw_allocate_vgrf_units(*bld.shader, comps * runit), BRW_TYPE_F);
|
||||
wbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
|
||||
|
||||
brw_urb_inst *urb = wbld.URB_WRITE(srcs, ARRAY_SIZE(srcs));
|
||||
urb->components = comps;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_indirect_writes(const brw_builder &bld, nir_intrinsic_instr *instr,
|
||||
const brw_reg &src, const brw_reg &offset_src,
|
||||
brw_reg urb_handle)
|
||||
{
|
||||
assert(bld.shader->devinfo->ver < 20);
|
||||
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||
|
||||
const unsigned comps = nir_src_num_components(instr->src[0]);
|
||||
assert(comps <= 4);
|
||||
|
||||
const unsigned base_in_dwords = nir_intrinsic_base(instr) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
/* Use URB write message that allow different offsets per-slot. The offset
|
||||
* is in units of vec4s (128 bits), so we use a write for each component,
|
||||
* replicating it in the sources and applying the appropriate mask based on
|
||||
* the dword offset.
|
||||
*/
|
||||
|
||||
for (unsigned c = 0; c < comps; c++) {
|
||||
if (((1 << c) & nir_intrinsic_write_mask(instr)) == 0)
|
||||
continue;
|
||||
|
||||
brw_reg src_comp = offset(src, bld, c);
|
||||
|
||||
for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
|
||||
brw_builder bld8 = bld.group(8, q);
|
||||
|
||||
/* offset is always positive, so signedness doesn't matter */
|
||||
assert(offset_src.type == BRW_TYPE_D ||
|
||||
offset_src.type == BRW_TYPE_UD);
|
||||
|
||||
brw_reg off =
|
||||
bld8.ADD(quarter(retype(offset_src, BRW_TYPE_UD), q),
|
||||
brw_imm_ud(c + base_in_dwords));
|
||||
brw_reg m = bld8.AND(off, brw_imm_ud(0x3));
|
||||
brw_reg mask = bld8.SHL(bld8.MOV(brw_imm_ud(1)), m);
|
||||
brw_reg final_offset = bld8.SHR(off, brw_imm_ud(2));
|
||||
|
||||
brw_reg payload_srcs[4];
|
||||
unsigned length = 0;
|
||||
|
||||
for (unsigned j = 0; j < 4; j++)
|
||||
payload_srcs[length++] = quarter(src_comp, q);
|
||||
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = final_offset;
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask;
|
||||
srcs[URB_LOGICAL_SRC_DATA] =
|
||||
retype(brw_allocate_vgrf_units(*bld.shader, length), BRW_TYPE_F);
|
||||
bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
|
||||
|
||||
brw_urb_inst *urb = bld8.URB_WRITE(srcs, ARRAY_SIZE(srcs));
|
||||
urb->components = length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_direct_reads(const brw_builder &bld, nir_intrinsic_instr *instr,
|
||||
const brw_reg &dest, brw_reg urb_handle)
|
||||
{
|
||||
assert(bld.shader->devinfo->ver < 20);
|
||||
assert(instr->def.bit_size == 32);
|
||||
|
||||
unsigned comps = instr->def.num_components;
|
||||
if (comps == 0)
|
||||
return;
|
||||
|
||||
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
|
||||
assert(nir_src_is_const(*offset_nir_src));
|
||||
|
||||
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
|
||||
nir_src_as_uint(*offset_nir_src) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
unsigned urb_global_offset = offset_in_dwords / 4;
|
||||
adjust_handle_and_offset(bld, urb_handle, urb_global_offset);
|
||||
|
||||
const unsigned comp_offset = offset_in_dwords % 4;
|
||||
const unsigned num_regs = comp_offset + comps;
|
||||
|
||||
brw_builder ubld8 = bld.group(8, 0).exec_all();
|
||||
brw_reg data = ubld8.vgrf(BRW_TYPE_UD, num_regs);
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
|
||||
brw_urb_inst *urb = ubld8.URB_READ(data, srcs, ARRAY_SIZE(srcs));
|
||||
urb->offset = urb_global_offset;
|
||||
assert(urb->offset < 2048);
|
||||
urb->size_written = num_regs * REG_SIZE;
|
||||
|
||||
for (unsigned c = 0; c < comps; c++) {
|
||||
brw_reg dest_comp = offset(dest, bld, c);
|
||||
brw_reg data_comp = horiz_stride(offset(data, ubld8, comp_offset + c), 0);
|
||||
bld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_direct_reads_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
|
||||
const brw_reg &dest, brw_reg urb_handle)
|
||||
{
|
||||
assert(instr->def.bit_size == 32);
|
||||
|
||||
unsigned comps = instr->def.num_components;
|
||||
if (comps == 0)
|
||||
return;
|
||||
|
||||
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
|
||||
assert(nir_src_is_const(*offset_nir_src));
|
||||
|
||||
brw_builder ubld16 = bld.group(16, 0).exec_all();
|
||||
|
||||
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
|
||||
nir_src_as_uint(*offset_nir_src) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
if (offset_in_dwords > 0)
|
||||
urb_handle = ubld16.ADD(urb_handle, brw_imm_ud(offset_in_dwords * 4));
|
||||
|
||||
brw_reg data = ubld16.vgrf(BRW_TYPE_UD, comps);
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
|
||||
brw_inst *inst = ubld16.URB_READ(data, srcs, ARRAY_SIZE(srcs));
|
||||
inst->size_written = 2 * comps * REG_SIZE;
|
||||
|
||||
for (unsigned c = 0; c < comps; c++) {
|
||||
brw_reg dest_comp = offset(dest, bld, c);
|
||||
brw_reg data_comp = horiz_stride(offset(data, ubld16, c), 0);
|
||||
bld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_indirect_reads(const brw_builder &bld, nir_intrinsic_instr *instr,
|
||||
const brw_reg &dest, const brw_reg &offset_src, brw_reg urb_handle)
|
||||
{
|
||||
assert(instr->def.bit_size == 32);
|
||||
|
||||
unsigned comps = instr->def.num_components;
|
||||
if (comps == 0)
|
||||
return;
|
||||
|
||||
brw_reg seq_ud;
|
||||
{
|
||||
brw_builder ubld8 = bld.group(8, 0).exec_all();
|
||||
seq_ud = ubld8.vgrf(BRW_TYPE_UD, 1);
|
||||
brw_reg seq_uw = ubld8.vgrf(BRW_TYPE_UW, 1);
|
||||
ubld8.MOV(seq_uw, brw_reg(brw_imm_v(0x76543210)));
|
||||
ubld8.MOV(seq_ud, seq_uw);
|
||||
seq_ud = ubld8.SHL(seq_ud, brw_imm_ud(2));
|
||||
}
|
||||
|
||||
const unsigned base_in_dwords = nir_intrinsic_base(instr) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
for (unsigned c = 0; c < comps; c++) {
|
||||
for (unsigned q = 0; q < bld.dispatch_width() / 8; q++) {
|
||||
brw_builder bld8 = bld.group(8, q);
|
||||
|
||||
/* offset is always positive, so signedness doesn't matter */
|
||||
assert(offset_src.type == BRW_TYPE_D ||
|
||||
offset_src.type == BRW_TYPE_UD);
|
||||
brw_reg off =
|
||||
bld8.ADD(bld8.MOV(quarter(retype(offset_src, BRW_TYPE_UD), q)),
|
||||
brw_imm_ud(base_in_dwords + c));
|
||||
|
||||
STATIC_ASSERT(IS_POT(REG_SIZE) && REG_SIZE > 1);
|
||||
|
||||
brw_reg comp;
|
||||
comp = bld8.AND(off, brw_imm_ud(0x3));
|
||||
comp = bld8.SHL(comp, brw_imm_ud(ffs(REG_SIZE) - 1));
|
||||
comp = bld8.ADD(comp, seq_ud);
|
||||
|
||||
off = bld8.SHR(off, brw_imm_ud(2));
|
||||
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
|
||||
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = off;
|
||||
|
||||
brw_reg data = bld8.vgrf(BRW_TYPE_UD, 4);
|
||||
|
||||
brw_urb_inst *urb = bld8.URB_READ(data, srcs, ARRAY_SIZE(srcs));
|
||||
urb->size_written = 4 * REG_SIZE;
|
||||
|
||||
brw_reg dest_comp = offset(dest, bld, c);
|
||||
bld8.emit(SHADER_OPCODE_MOV_INDIRECT,
|
||||
retype(quarter(dest_comp, q), BRW_TYPE_UD),
|
||||
data,
|
||||
comp,
|
||||
brw_imm_ud(4 * REG_SIZE));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_urb_indirect_reads_xe2(const brw_builder &bld, nir_intrinsic_instr *instr,
|
||||
const brw_reg &dest, const brw_reg &offset_src,
|
||||
brw_reg urb_handle)
|
||||
{
|
||||
assert(instr->def.bit_size == 32);
|
||||
|
||||
unsigned comps = instr->def.num_components;
|
||||
if (comps == 0)
|
||||
return;
|
||||
|
||||
brw_builder ubld16 = bld.group(16, 0).exec_all();
|
||||
|
||||
const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
|
||||
component_from_intrinsic(instr);
|
||||
|
||||
if (offset_in_dwords > 0)
|
||||
urb_handle = ubld16.ADD(urb_handle, brw_imm_ud(offset_in_dwords * 4));
|
||||
|
||||
brw_reg data = ubld16.vgrf(BRW_TYPE_UD, comps);
|
||||
|
||||
for (unsigned q = 0; q < bld.dispatch_width() / 16; q++) {
|
||||
brw_builder wbld = bld.group(16, q);
|
||||
|
||||
brw_reg addr = wbld.SHL(retype(horiz_offset(offset_src, 16 * q),
|
||||
BRW_TYPE_UD),
|
||||
brw_imm_ud(2));
|
||||
|
||||
brw_reg srcs[URB_LOGICAL_NUM_SRCS];
|
||||
srcs[URB_LOGICAL_SRC_HANDLE] = wbld.ADD(addr, urb_handle);
|
||||
|
||||
brw_inst *inst = wbld.URB_READ(data, srcs, ARRAY_SIZE(srcs));
|
||||
inst->size_written = 2 * comps * REG_SIZE;
|
||||
|
||||
for (unsigned c = 0; c < comps; c++) {
|
||||
brw_reg dest_comp = horiz_offset(offset(dest, bld, c), 16 * q);
|
||||
brw_reg data_comp = offset(data, wbld, c);
|
||||
wbld.MOV(retype(dest_comp, BRW_TYPE_UD), data_comp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_task_mesh_store(nir_to_brw_state &ntb,
|
||||
const brw_builder &bld, nir_intrinsic_instr *instr,
|
||||
const brw_reg &urb_handle)
|
||||
{
|
||||
brw_reg src = get_nir_src(ntb, instr->src[0], -1);
|
||||
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
|
||||
|
||||
if (nir_src_is_const(*offset_nir_src)) {
|
||||
if (bld.shader->devinfo->ver >= 20)
|
||||
emit_urb_direct_writes_xe2(bld, instr, src, urb_handle);
|
||||
else
|
||||
emit_urb_direct_writes(bld, instr, src, urb_handle);
|
||||
} else {
|
||||
if (bld.shader->devinfo->ver >= 20) {
|
||||
emit_urb_indirect_writes_xe2(bld, instr, src,
|
||||
get_nir_src(ntb, *offset_nir_src, 0),
|
||||
urb_handle);
|
||||
return;
|
||||
}
|
||||
bool use_mod = false;
|
||||
unsigned mod;
|
||||
|
||||
/* Try to calculate the value of (offset + base) % 4. If we can do
|
||||
* this, then we can do indirect writes using only 1 URB write.
|
||||
*/
|
||||
use_mod = nir_mod_analysis(nir_get_scalar(offset_nir_src->ssa, 0), nir_type_uint, 4, &mod);
|
||||
if (use_mod) {
|
||||
mod += nir_intrinsic_base(instr) + component_from_intrinsic(instr);
|
||||
mod %= 4;
|
||||
}
|
||||
|
||||
if (use_mod) {
|
||||
emit_urb_indirect_writes_mod(bld, instr, src,
|
||||
get_nir_src(ntb, *offset_nir_src, 0),
|
||||
urb_handle, mod);
|
||||
} else {
|
||||
emit_urb_indirect_writes(bld, instr, src,
|
||||
get_nir_src(ntb, *offset_nir_src, 0),
|
||||
urb_handle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_task_mesh_load(nir_to_brw_state &ntb,
|
||||
const brw_builder &bld, nir_intrinsic_instr *instr,
|
||||
const brw_reg &urb_handle)
|
||||
{
|
||||
brw_reg dest = get_nir_def(ntb, instr->def);
|
||||
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
|
||||
|
||||
/* TODO(mesh): for per_vertex and per_primitive, if we could keep around
|
||||
* the non-array-index offset, we could use to decide if we can perform
|
||||
* a single large aligned read instead one per component.
|
||||
*/
|
||||
|
||||
if (nir_src_is_const(*offset_nir_src)) {
|
||||
if (bld.shader->devinfo->ver >= 20)
|
||||
emit_urb_direct_reads_xe2(bld, instr, dest, urb_handle);
|
||||
else
|
||||
emit_urb_direct_reads(bld, instr, dest, urb_handle);
|
||||
} else {
|
||||
if (bld.shader->devinfo->ver >= 20)
|
||||
emit_urb_indirect_reads_xe2(bld, instr, dest,
|
||||
get_nir_src(ntb, *offset_nir_src, 0),
|
||||
urb_handle);
|
||||
else
|
||||
emit_urb_indirect_reads(bld, instr, dest,
|
||||
get_nir_src(ntb, *offset_nir_src, 0),
|
||||
urb_handle);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const brw_builder &bld,
|
||||
nir_intrinsic_instr *instr)
|
||||
brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb,
|
||||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
brw_builder &bld = ntb.bld;
|
||||
brw_shader &s = ntb.s;
|
||||
|
||||
assert(s.stage == MESA_SHADER_MESH || s.stage == MESA_SHADER_TASK);
|
||||
@@ -5466,6 +4904,11 @@ brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const brw_builder &
|
||||
dest = get_nir_def(ntb, instr->def);
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_urb_input_handle_intel:
|
||||
assert(s.stage == MESA_SHADER_MESH);
|
||||
bld.MOV(retype(dest, BRW_TYPE_UD), payload.task_urb_input);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_urb_output_handle_intel:
|
||||
bld.MOV(retype(dest, BRW_TYPE_UD), payload.urb_output);
|
||||
break;
|
||||
@@ -5502,73 +4945,6 @@ brw_from_nir_emit_task_mesh_intrinsic(nir_to_brw_state &ntb, const brw_builder &
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_from_nir_emit_task_intrinsic(nir_to_brw_state &ntb,
|
||||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
const brw_builder &bld = ntb.bld;
|
||||
brw_shader &s = ntb.s;
|
||||
|
||||
assert(s.stage == MESA_SHADER_TASK);
|
||||
const brw_task_mesh_thread_payload &payload = s.task_mesh_payload();
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_store_output:
|
||||
case nir_intrinsic_store_task_payload:
|
||||
emit_task_mesh_store(ntb, bld, instr, payload.urb_output);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_output:
|
||||
case nir_intrinsic_load_task_payload:
|
||||
emit_task_mesh_load(ntb, bld, instr, payload.urb_output);
|
||||
break;
|
||||
|
||||
default:
|
||||
brw_from_nir_emit_task_mesh_intrinsic(ntb, bld, instr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_from_nir_emit_mesh_intrinsic(nir_to_brw_state &ntb,
|
||||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
const brw_builder &bld = ntb.bld;
|
||||
brw_shader &s = ntb.s;
|
||||
|
||||
assert(s.stage == MESA_SHADER_MESH);
|
||||
const brw_task_mesh_thread_payload &payload = s.task_mesh_payload();
|
||||
|
||||
brw_reg dest;
|
||||
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
|
||||
dest = get_nir_def(ntb, instr->def);
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_load_urb_input_handle_intel:
|
||||
bld.MOV(retype(dest, BRW_TYPE_UD), payload.task_urb_input);
|
||||
break;
|
||||
case nir_intrinsic_store_per_primitive_output:
|
||||
case nir_intrinsic_store_per_vertex_output:
|
||||
case nir_intrinsic_store_output:
|
||||
emit_task_mesh_store(ntb, bld, instr, payload.urb_output);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_per_vertex_output:
|
||||
case nir_intrinsic_load_per_primitive_output:
|
||||
case nir_intrinsic_load_output:
|
||||
emit_task_mesh_load(ntb, bld, instr, payload.urb_output);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_task_payload:
|
||||
emit_task_mesh_load(ntb, bld, instr, payload.task_urb_input);
|
||||
break;
|
||||
|
||||
default:
|
||||
brw_from_nir_emit_task_mesh_intrinsic(ntb, bld, instr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||
const brw_builder &bld, nir_intrinsic_instr *instr)
|
||||
@@ -7562,10 +6938,9 @@ brw_from_nir_emit_instr(nir_to_brw_state &ntb, nir_instr *instr)
|
||||
brw_from_nir_emit_bs_intrinsic(ntb, nir_instr_as_intrinsic(instr));
|
||||
break;
|
||||
case MESA_SHADER_TASK:
|
||||
brw_from_nir_emit_task_intrinsic(ntb, nir_instr_as_intrinsic(instr));
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
brw_from_nir_emit_mesh_intrinsic(ntb, nir_instr_as_intrinsic(instr));
|
||||
brw_from_nir_emit_task_mesh_intrinsic(ntb,
|
||||
nir_instr_as_intrinsic(instr));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("unsupported shader stage");
|
||||
|
||||
Reference in New Issue
Block a user