intel/brw/xehp+: Replace lsc_msg_desc_dest_len()/lsc_msg_desc_src0_len() with helpers to do the computation.

We cannot rely on the immediate message descriptor having accurate
values for mlen and rlen at the IR level, since they are updated at
codegen time via 'inst->mlen' and 'inst->size_written', which could
end up with values inconsistent with the message descriptor if
e.g. the split sends optimization had an effect.  Instead, define
helpers that do the computation without relying on the message
descriptor, and use the pre-existing
brw_message_desc_mlen()/brw_message_desc_rlen() helpers (fully
equivalent to the lsc helpers deleted here) during disassembly.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28484>
This commit is contained in:
Francisco Jerez
2022-09-28 16:17:02 -07:00
committed by Marge Bot
parent 5f9ab41457
commit fa96274a87
5 changed files with 77 additions and 41 deletions
+4 -4
View File
@@ -2270,8 +2270,8 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa,
break;
}
format(file, " dst_len = %u,", lsc_msg_desc_dest_len(devinfo, imm_desc));
format(file, " src0_len = %u,", lsc_msg_desc_src0_len(devinfo, imm_desc));
format(file, " dst_len = %u,", brw_message_desc_rlen(devinfo, imm_desc));
format(file, " src0_len = %u,", brw_message_desc_mlen(devinfo, imm_desc));
format(file, " src1_len = %d", brw_message_ex_desc_ex_mlen(devinfo, imm_ex_desc));
err |= control(file, "address_type", lsc_addr_surface_type,
lsc_msg_desc_addr_type(devinfo, imm_desc), &space);
@@ -2378,8 +2378,8 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa,
break;
}
}
format(file, " dst_len = %u,", lsc_msg_desc_dest_len(devinfo, imm_desc));
format(file, " src0_len = %u,", lsc_msg_desc_src0_len(devinfo, imm_desc));
format(file, " dst_len = %u,", brw_message_desc_rlen(devinfo, imm_desc));
format(file, " src0_len = %u,", brw_message_desc_mlen(devinfo, imm_desc));
if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst))
format(file, " src1_len = %d",
+8 -8
View File
@@ -1258,19 +1258,19 @@ lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
}
static inline unsigned
lsc_msg_desc_dest_len(const struct intel_device_info *devinfo,
uint32_t desc)
lsc_msg_dest_len(const struct intel_device_info *devinfo,
enum lsc_data_size data_sz, unsigned n)
{
assert(devinfo->has_lsc);
return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
return DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * n,
reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
}
static inline unsigned
lsc_msg_desc_src0_len(const struct intel_device_info *devinfo,
uint32_t desc)
lsc_msg_addr_len(const struct intel_device_info *devinfo,
enum lsc_addr_size addr_sz, unsigned n)
{
assert(devinfo->has_lsc);
return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
return DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * n,
reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
}
static inline enum lsc_addr_surface_type
+2 -2
View File
@@ -1360,9 +1360,9 @@ fs_visitor::assign_curb_setup()
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS),
true /* has_dest */);
send->header_size = 0;
send->mlen = lsc_msg_desc_src0_len(devinfo, send->desc);
send->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1);
send->size_written =
lsc_msg_desc_dest_len(devinfo, send->desc) * REG_SIZE;
lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, num_regs * 8) * REG_SIZE;
send->send_is_volatile = true;
i += num_regs;
+5 -4
View File
@@ -684,11 +684,11 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld,
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS),
true /* has_dest */);
unspill_inst->header_size = 0;
unspill_inst->mlen =
lsc_msg_desc_src0_len(devinfo, unspill_inst->desc);
unspill_inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32,
unspill_inst->exec_size);
unspill_inst->ex_mlen = 0;
unspill_inst->size_written =
lsc_msg_desc_dest_len(devinfo, unspill_inst->desc) * REG_SIZE;
lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, bld.dispatch_width()) * REG_SIZE;
unspill_inst->send_has_side_effects = false;
unspill_inst->send_is_volatile = true;
unspill_inst->send_ex_desc_scratch = true;
@@ -766,7 +766,8 @@ fs_reg_alloc::emit_spill(const fs_builder &bld,
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS),
false /* has_dest */);
spill_inst->header_size = 0;
spill_inst->mlen = lsc_msg_desc_src0_len(devinfo, spill_inst->desc);
spill_inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32,
bld.dispatch_width());
spill_inst->ex_mlen = reg_size;
spill_inst->size_written = 0;
spill_inst->send_has_side_effects = true;
+58 -23
View File
@@ -121,7 +121,7 @@ lower_urb_read_logical_send_xe2(const fs_builder &bld, fs_inst *inst)
/* Update the original instruction. */
inst->opcode = SHADER_OPCODE_SEND;
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size);
inst->ex_mlen = 0;
inst->header_size = 0;
inst->send_has_side_effects = true;
@@ -252,7 +252,7 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst)
/* Update the original instruction. */
inst->opcode = SHADER_OPCODE_SEND;
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size);
inst->ex_mlen = ex_mlen;
inst->header_size = 0;
inst->send_has_side_effects = true;
@@ -1665,6 +1665,9 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
const bool has_side_effects = inst->has_side_effects();
unsigned num_components = 0;
bool has_dest = false;
unsigned ex_mlen = 0;
fs_reg payload, payload2;
payload = bld.move_to_vgrf(addr, addr_sz);
@@ -1717,19 +1720,23 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
switch (inst->opcode) {
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
num_components = arg.ud;
has_dest = true;
inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD_CMASK, inst->exec_size,
surf_type, LSC_ADDR_SIZE_A32,
dims.ud /* num_coordinates */,
LSC_DATA_SIZE_D32, arg.ud /* num_channels */,
LSC_DATA_SIZE_D32, num_components,
false /* transpose */,
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS),
true /* has_dest */);
break;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
num_components = arg.ud;
has_dest = false;
inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE_CMASK, inst->exec_size,
surf_type, LSC_ADDR_SIZE_A32,
dims.ud /* num_coordinates */,
LSC_DATA_SIZE_D32, arg.ud /* num_channels */,
LSC_DATA_SIZE_D32, num_components,
false /* transpose */,
LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS),
false /* has_dest */);
@@ -1742,32 +1749,38 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
*/
enum lsc_opcode opcode = (enum lsc_opcode) arg.ud;
num_components = 1;
has_dest = !inst->dst.is_null();
inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size,
surf_type, LSC_ADDR_SIZE_A32,
dims.ud /* num_coordinates */,
lsc_bits_to_data_size(dst_sz * 8),
1 /* num_channels */,
num_components,
false /* transpose */,
LSC_CACHE(devinfo, STORE, L1UC_L3WB),
!inst->dst.is_null());
break;
}
case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
num_components = 1;
has_dest = true;
inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, inst->exec_size,
surf_type, LSC_ADDR_SIZE_A32,
dims.ud /* num_coordinates */,
lsc_bits_to_data_size(arg.ud),
1 /* num_channels */,
num_components,
false /* transpose */,
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS),
true /* has_dest */);
break;
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
num_components = 1;
has_dest = false;
inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size,
surf_type, LSC_ADDR_SIZE_A32,
dims.ud /* num_coordinates */,
lsc_bits_to_data_size(arg.ud),
1 /* num_channels */,
num_components,
false /* transpose */,
LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS),
false /* has_dest */);
@@ -1778,14 +1791,16 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
/* Update the original instruction. */
inst->opcode = SHADER_OPCODE_SEND;
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size * dims.ud);
inst->ex_mlen = ex_mlen;
inst->header_size = 0;
inst->send_has_side_effects = has_side_effects;
inst->send_is_volatile = !has_side_effects;
inst->send_ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS &&
compiler->extended_bindless_surface_offset;
inst->size_written = lsc_msg_desc_dest_len(devinfo, inst->desc) * REG_SIZE;
inst->size_written = !has_dest ? 0 :
lsc_msg_dest_len(devinfo, lsc_msg_desc_data_size(devinfo, inst->desc),
inst->exec_size * num_components) * REG_SIZE;
inst->resize_sources(4);
@@ -1865,8 +1880,9 @@ lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst)
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS),
!write /* has_dest */);
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
inst->size_written = lsc_msg_desc_dest_len(devinfo, inst->desc) * REG_SIZE;
inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1);
inst->size_written = write ? 0 :
lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, arg.ud) * REG_SIZE;
inst->exec_size = 1;
inst->ex_mlen = write ? DIV_ROUND_UP(arg.ud, 8) : 0;
inst->header_size = 0;
@@ -2024,42 +2040,52 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
fs_reg payload2 = retype(bld.move_to_vgrf(src, src_comps),
BRW_REGISTER_TYPE_UD);
unsigned ex_mlen = src_comps * src_sz * inst->exec_size / REG_SIZE;
unsigned num_components = 0;
bool has_dest = false;
switch (inst->opcode) {
case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
num_components = arg;
has_dest = true;
inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD_CMASK, inst->exec_size,
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,
1 /* num_coordinates */,
LSC_DATA_SIZE_D32, arg /* num_channels */,
LSC_DATA_SIZE_D32, num_components,
false /* transpose */,
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS),
true /* has_dest */);
break;
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
num_components = arg;
has_dest = false;
inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE_CMASK, inst->exec_size,
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,
1 /* num_coordinates */,
LSC_DATA_SIZE_D32, arg /* num_channels */,
LSC_DATA_SIZE_D32, num_components,
false /* transpose */,
LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS),
false /* has_dest */);
break;
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
num_components = 1;
has_dest = true;
inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, inst->exec_size,
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,
1 /* num_coordinates */,
lsc_bits_to_data_size(arg),
1 /* num_channels */,
num_components,
false /* transpose */,
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS),
true /* has_dest */);
break;
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
num_components = 1;
has_dest = false;
inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size,
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,
1 /* num_coordinates */,
lsc_bits_to_data_size(arg),
1 /* num_channels */,
num_components,
false /* transpose */,
LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS),
false /* has_dest */);
@@ -2071,11 +2097,13 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
* cache.
*/
enum lsc_opcode opcode = (enum lsc_opcode) arg;
num_components = 1;
has_dest = !inst->dst.is_null();
inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size,
LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64,
1 /* num_coordinates */,
lsc_bits_to_data_size(dst_sz * 8),
1 /* num_channels */,
num_components,
false /* transpose */,
LSC_CACHE(devinfo, STORE, L1UC_L3WB),
!inst->dst.is_null());
@@ -2083,6 +2111,8 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
}
case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
num_components = arg;
has_dest = true;
inst->exec_size = 1;
inst->desc = lsc_msg_desc(devinfo,
LSC_OP_LOAD,
@@ -2091,12 +2121,14 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
LSC_ADDR_SIZE_A64,
1 /* num_coordinates */,
LSC_DATA_SIZE_D32,
arg /* num_channels */,
num_components,
true /* transpose */,
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS),
true /* has_dest */);
break;
case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
num_components = arg;
has_dest = false;
inst->exec_size = 1;
inst->desc = lsc_msg_desc(devinfo,
LSC_OP_STORE,
@@ -2105,7 +2137,7 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
LSC_ADDR_SIZE_A64,
1 /* num_coordinates */,
LSC_DATA_SIZE_D32,
arg /* num_channels */,
num_components,
true /* transpose */,
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS),
false /* has_dest */);
@@ -2120,12 +2152,15 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst)
/* Update the original instruction. */
inst->opcode = SHADER_OPCODE_SEND;
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A64, inst->exec_size);
inst->ex_mlen = ex_mlen;
inst->header_size = 0;
inst->send_has_side_effects = has_side_effects;
inst->send_is_volatile = !has_side_effects;
inst->size_written = lsc_msg_desc_dest_len(devinfo, inst->desc) * REG_SIZE;
inst->size_written = !has_dest ? 0 :
lsc_msg_dest_len(devinfo, lsc_msg_desc_data_size(devinfo, inst->desc),
inst->exec_size * num_components) * REG_SIZE;
/* Set up SFID and descriptors */
inst->sfid = GFX12_SFID_UGM;
@@ -2306,7 +2341,7 @@ lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld,
false /* transpose */,
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS),
true /* has_dest */);
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size);
setup_lsc_surface_descriptors(bld, inst, inst->desc,
surface.file != BAD_FILE ?
@@ -2321,7 +2356,7 @@ lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld,
false /* transpose */,
LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS),
true /* has_dest */);
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size);
setup_lsc_surface_descriptors(bld, inst, inst->desc,
surface.file != BAD_FILE ?
@@ -2936,7 +2971,7 @@ brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s)
/* Update the original instruction. */
inst->opcode = SHADER_OPCODE_SEND;
inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc);
inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1);
inst->send_ex_bso = surface_handle.file != BAD_FILE &&
s.compiler->extended_bindless_surface_offset;
inst->ex_mlen = 0;