From fa96274a87183d2c259754b11a1df4bd48668007 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 28 Sep 2022 16:17:02 -0700 Subject: [PATCH] intel/brw/xehp+: Replace lsc_msg_desc_dest_len()/lsc_msg_desc_src0_len() with helpers to do the computation. We cannot rely on the immediate message descriptor having accurate values for mlen and rlen at the IR level, since they are updated at codegen time via 'inst->mlen' and 'inst->size_written', which could end up with values inconsistent with the message descriptor if e.g. the split sends optimization had an effect. Instead, define helpers that do the computation without relying on the message descriptor, and use the pre-existing brw_message_desc_mlen()/brw_message_desc_rlen() helpers (fully equivalent to the lsc helpers deleted here) during disassembly. Reviewed-by: Ian Romanick Part-of: --- src/intel/compiler/brw_disasm.c | 8 +- src/intel/compiler/brw_eu.h | 16 ++-- src/intel/compiler/brw_fs.cpp | 4 +- src/intel/compiler/brw_fs_reg_allocate.cpp | 9 ++- .../compiler/brw_lower_logical_sends.cpp | 81 +++++++++++++------ 5 files changed, 77 insertions(+), 41 deletions(-) diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index b68b649db9a..a48c141fe70 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -2270,8 +2270,8 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa, break; } - format(file, " dst_len = %u,", lsc_msg_desc_dest_len(devinfo, imm_desc)); - format(file, " src0_len = %u,", lsc_msg_desc_src0_len(devinfo, imm_desc)); + format(file, " dst_len = %u,", brw_message_desc_rlen(devinfo, imm_desc)); + format(file, " src0_len = %u,", brw_message_desc_mlen(devinfo, imm_desc)); format(file, " src1_len = %d", brw_message_ex_desc_ex_mlen(devinfo, imm_ex_desc)); err |= control(file, "address_type", lsc_addr_surface_type, lsc_msg_desc_addr_type(devinfo, imm_desc), &space); @@ -2378,8 +2378,8 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa, break; } } - format(file, " dst_len = %u,", lsc_msg_desc_dest_len(devinfo, imm_desc)); - format(file, " src0_len = %u,", lsc_msg_desc_src0_len(devinfo, imm_desc)); + format(file, " dst_len = %u,", brw_message_desc_rlen(devinfo, imm_desc)); + format(file, " src0_len = %u,", brw_message_desc_mlen(devinfo, imm_desc)); if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) format(file, " src1_len = %d", diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index ae668808838..1a03f8067b4 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -1258,19 +1258,19 @@ lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo, } static inline unsigned -lsc_msg_desc_dest_len(const struct intel_device_info *devinfo, - uint32_t desc) +lsc_msg_dest_len(const struct intel_device_info *devinfo, + enum lsc_data_size data_sz, unsigned n) { - assert(devinfo->has_lsc); - return GET_BITS(desc, 24, 20) * reg_unit(devinfo); + return DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * n, + reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo); } static inline unsigned -lsc_msg_desc_src0_len(const struct intel_device_info *devinfo, - uint32_t desc) +lsc_msg_addr_len(const struct intel_device_info *devinfo, + enum lsc_addr_size addr_sz, unsigned n) { - assert(devinfo->has_lsc); - return GET_BITS(desc, 28, 25) * reg_unit(devinfo); + return DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * n, + reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo); } static inline enum lsc_addr_surface_type diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 00265202ffb..9d78b0d133a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1360,9 +1360,9 @@ fs_visitor::assign_curb_setup() LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); send->header_size = 0; - send->mlen = lsc_msg_desc_src0_len(devinfo, send->desc); + send->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1); send->size_written = - lsc_msg_desc_dest_len(devinfo, send->desc) * REG_SIZE; + lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, num_regs * 8) * REG_SIZE; send->send_is_volatile = true; i += num_regs; diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index cee40fab354..d374354983d 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -684,11 +684,11 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); unspill_inst->header_size = 0; - unspill_inst->mlen = - lsc_msg_desc_src0_len(devinfo, unspill_inst->desc); + unspill_inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, + unspill_inst->exec_size); unspill_inst->ex_mlen = 0; unspill_inst->size_written = - lsc_msg_desc_dest_len(devinfo, unspill_inst->desc) * REG_SIZE; + lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, bld.dispatch_width()) * REG_SIZE; unspill_inst->send_has_side_effects = false; unspill_inst->send_is_volatile = true; unspill_inst->send_ex_desc_scratch = true; @@ -766,7 +766,8 @@ fs_reg_alloc::emit_spill(const fs_builder &bld, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), false /* has_dest */); spill_inst->header_size = 0; - spill_inst->mlen = lsc_msg_desc_src0_len(devinfo, spill_inst->desc); + spill_inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, + bld.dispatch_width()); spill_inst->ex_mlen = reg_size; spill_inst->size_written = 0; spill_inst->send_has_side_effects = true; diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 02338418366..147262ceb98 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -121,7 +121,7 @@ lower_urb_read_logical_send_xe2(const fs_builder &bld, fs_inst *inst) /* Update the original instruction. */ inst->opcode = SHADER_OPCODE_SEND; - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size); inst->ex_mlen = 0; inst->header_size = 0; inst->send_has_side_effects = true; @@ -252,7 +252,7 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst) /* Update the original instruction. */ inst->opcode = SHADER_OPCODE_SEND; - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size); inst->ex_mlen = ex_mlen; inst->header_size = 0; inst->send_has_side_effects = true; @@ -1665,6 +1665,9 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) const bool has_side_effects = inst->has_side_effects(); + unsigned num_components = 0; + bool has_dest = false; + unsigned ex_mlen = 0; fs_reg payload, payload2; payload = bld.move_to_vgrf(addr, addr_sz); @@ -1717,19 +1720,23 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) switch (inst->opcode) { case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + num_components = arg.ud; + has_dest = true; inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD_CMASK, inst->exec_size, surf_type, LSC_ADDR_SIZE_A32, dims.ud /* num_coordinates */, - LSC_DATA_SIZE_D32, arg.ud /* num_channels */, + LSC_DATA_SIZE_D32, num_components, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); break; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: + num_components = arg.ud; + has_dest = false; inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE_CMASK, inst->exec_size, surf_type, LSC_ADDR_SIZE_A32, dims.ud /* num_coordinates */, - LSC_DATA_SIZE_D32, arg.ud /* num_channels */, + LSC_DATA_SIZE_D32, num_components, false /* transpose */, LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS), false /* has_dest */); @@ -1742,32 +1749,38 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) */ enum lsc_opcode opcode = (enum lsc_opcode) arg.ud; + num_components = 1; + has_dest = !inst->dst.is_null(); inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size, surf_type, LSC_ADDR_SIZE_A32, dims.ud /* num_coordinates */, lsc_bits_to_data_size(dst_sz * 8), - 1 /* num_channels */, + num_components, false /* transpose */, LSC_CACHE(devinfo, STORE, L1UC_L3WB), !inst->dst.is_null()); break; } case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: + num_components = 1; + has_dest = true; inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, inst->exec_size, surf_type, LSC_ADDR_SIZE_A32, dims.ud /* num_coordinates */, lsc_bits_to_data_size(arg.ud), - 1 /* num_channels */, + num_components, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); break; case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: + num_components = 1; + has_dest = false; inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size, surf_type, LSC_ADDR_SIZE_A32, dims.ud /* num_coordinates */, lsc_bits_to_data_size(arg.ud), - 1 /* num_channels */, + num_components, false /* transpose */, LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS), false /* has_dest */); @@ -1778,14 +1791,16 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) /* Update the original instruction. */ inst->opcode = SHADER_OPCODE_SEND; - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size * dims.ud); inst->ex_mlen = ex_mlen; inst->header_size = 0; inst->send_has_side_effects = has_side_effects; inst->send_is_volatile = !has_side_effects; inst->send_ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS && compiler->extended_bindless_surface_offset; - inst->size_written = lsc_msg_desc_dest_len(devinfo, inst->desc) * REG_SIZE; + inst->size_written = !has_dest ? 0 : + lsc_msg_dest_len(devinfo, lsc_msg_desc_data_size(devinfo, inst->desc), + inst->exec_size * num_components) * REG_SIZE; inst->resize_sources(4); @@ -1865,8 +1880,9 @@ lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst) LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), !write /* has_dest */); - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); - inst->size_written = lsc_msg_desc_dest_len(devinfo, inst->desc) * REG_SIZE; + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1); + inst->size_written = write ? 0 : + lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, arg.ud) * REG_SIZE; inst->exec_size = 1; inst->ex_mlen = write ? DIV_ROUND_UP(arg.ud, 8) : 0; inst->header_size = 0; @@ -2024,42 +2040,52 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) fs_reg payload2 = retype(bld.move_to_vgrf(src, src_comps), BRW_REGISTER_TYPE_UD); unsigned ex_mlen = src_comps * src_sz * inst->exec_size / REG_SIZE; + unsigned num_components = 0; + bool has_dest = false; switch (inst->opcode) { case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: + num_components = arg; + has_dest = true; inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD_CMASK, inst->exec_size, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, - LSC_DATA_SIZE_D32, arg /* num_channels */, + LSC_DATA_SIZE_D32, num_components, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); break; case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: + num_components = arg; + has_dest = false; inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE_CMASK, inst->exec_size, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, - LSC_DATA_SIZE_D32, arg /* num_channels */, + LSC_DATA_SIZE_D32, num_components, false /* transpose */, LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS), false /* has_dest */); break; case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: + num_components = 1; + has_dest = true; inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, inst->exec_size, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, lsc_bits_to_data_size(arg), - 1 /* num_channels */, + num_components, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); break; case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: + num_components = 1; + has_dest = false; inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, lsc_bits_to_data_size(arg), - 1 /* num_channels */, + num_components, false /* transpose */, LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS), false /* has_dest */); @@ -2071,11 +2097,13 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) * cache. */ enum lsc_opcode opcode = (enum lsc_opcode) arg; + num_components = 1; + has_dest = !inst->dst.is_null(); inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, lsc_bits_to_data_size(dst_sz * 8), - 1 /* num_channels */, + num_components, false /* transpose */, LSC_CACHE(devinfo, STORE, L1UC_L3WB), !inst->dst.is_null()); @@ -2083,6 +2111,8 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) } case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: + num_components = arg; + has_dest = true; inst->exec_size = 1; inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, @@ -2091,12 +2121,14 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, LSC_DATA_SIZE_D32, - arg /* num_channels */, + num_components, true /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); break; case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: + num_components = arg; + has_dest = false; inst->exec_size = 1; inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, @@ -2105,7 +2137,7 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, LSC_DATA_SIZE_D32, - arg /* num_channels */, + num_components, true /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), false /* has_dest */); @@ -2120,12 +2152,15 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) /* Update the original instruction. */ inst->opcode = SHADER_OPCODE_SEND; - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A64, inst->exec_size); inst->ex_mlen = ex_mlen; inst->header_size = 0; inst->send_has_side_effects = has_side_effects; inst->send_is_volatile = !has_side_effects; - inst->size_written = lsc_msg_desc_dest_len(devinfo, inst->desc) * REG_SIZE; + + inst->size_written = !has_dest ? 0 : + lsc_msg_dest_len(devinfo, lsc_msg_desc_data_size(devinfo, inst->desc), + inst->exec_size * num_components) * REG_SIZE; /* Set up SFID and descriptors */ inst->sfid = GFX12_SFID_UGM; @@ -2306,7 +2341,7 @@ lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size); setup_lsc_surface_descriptors(bld, inst, inst->desc, surface.file != BAD_FILE ? @@ -2321,7 +2356,7 @@ lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size); setup_lsc_surface_descriptors(bld, inst, inst->desc, surface.file != BAD_FILE ? @@ -2936,7 +2971,7 @@ brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s) /* Update the original instruction. */ inst->opcode = SHADER_OPCODE_SEND; - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1); inst->send_ex_bso = surface_handle.file != BAD_FILE && s.compiler->extended_bindless_surface_offset; inst->ex_mlen = 0;