diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index b68b649db9a..a48c141fe70 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -2270,8 +2270,8 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa, break; } - format(file, " dst_len = %u,", lsc_msg_desc_dest_len(devinfo, imm_desc)); - format(file, " src0_len = %u,", lsc_msg_desc_src0_len(devinfo, imm_desc)); + format(file, " dst_len = %u,", brw_message_desc_rlen(devinfo, imm_desc)); + format(file, " src0_len = %u,", brw_message_desc_mlen(devinfo, imm_desc)); format(file, " src1_len = %d", brw_message_ex_desc_ex_mlen(devinfo, imm_ex_desc)); err |= control(file, "address_type", lsc_addr_surface_type, lsc_msg_desc_addr_type(devinfo, imm_desc), &space); @@ -2378,8 +2378,8 @@ brw_disassemble_inst(FILE *file, const struct brw_isa_info *isa, break; } } - format(file, " dst_len = %u,", lsc_msg_desc_dest_len(devinfo, imm_desc)); - format(file, " src0_len = %u,", lsc_msg_desc_src0_len(devinfo, imm_desc)); + format(file, " dst_len = %u,", brw_message_desc_rlen(devinfo, imm_desc)); + format(file, " src0_len = %u,", brw_message_desc_mlen(devinfo, imm_desc)); if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) format(file, " src1_len = %d", diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index ae668808838..1a03f8067b4 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -1258,19 +1258,19 @@ lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo, } static inline unsigned -lsc_msg_desc_dest_len(const struct intel_device_info *devinfo, - uint32_t desc) +lsc_msg_dest_len(const struct intel_device_info *devinfo, + enum lsc_data_size data_sz, unsigned n) { - assert(devinfo->has_lsc); - return GET_BITS(desc, 24, 20) * reg_unit(devinfo); + return DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * n, + reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo); } static inline unsigned -lsc_msg_desc_src0_len(const struct intel_device_info *devinfo, - uint32_t desc) +lsc_msg_addr_len(const struct intel_device_info *devinfo, + enum lsc_addr_size addr_sz, unsigned n) { - assert(devinfo->has_lsc); - return GET_BITS(desc, 28, 25) * reg_unit(devinfo); + return DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * n, + reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo); } static inline enum lsc_addr_surface_type diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 00265202ffb..9d78b0d133a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1360,9 +1360,9 @@ fs_visitor::assign_curb_setup() LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); send->header_size = 0; - send->mlen = lsc_msg_desc_src0_len(devinfo, send->desc); + send->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1); send->size_written = - lsc_msg_desc_dest_len(devinfo, send->desc) * REG_SIZE; + lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, num_regs * 8) * REG_SIZE; send->send_is_volatile = true; i += num_regs; diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index cee40fab354..d374354983d 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -684,11 +684,11 @@ fs_reg_alloc::emit_unspill(const fs_builder &bld, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); unspill_inst->header_size = 0; - unspill_inst->mlen = - lsc_msg_desc_src0_len(devinfo, unspill_inst->desc); + unspill_inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, + unspill_inst->exec_size); unspill_inst->ex_mlen = 0; unspill_inst->size_written = - lsc_msg_desc_dest_len(devinfo, unspill_inst->desc) * REG_SIZE; + lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, bld.dispatch_width()) * REG_SIZE; unspill_inst->send_has_side_effects = false; unspill_inst->send_is_volatile = true; unspill_inst->send_ex_desc_scratch = true; @@ -766,7 +766,8 @@ fs_reg_alloc::emit_spill(const fs_builder &bld, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), false /* has_dest */); spill_inst->header_size = 0; - spill_inst->mlen = lsc_msg_desc_src0_len(devinfo, spill_inst->desc); + spill_inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, + bld.dispatch_width()); spill_inst->ex_mlen = reg_size; spill_inst->size_written = 0; spill_inst->send_has_side_effects = true; diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 02338418366..147262ceb98 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -121,7 +121,7 @@ lower_urb_read_logical_send_xe2(const fs_builder &bld, fs_inst *inst) /* Update the original instruction. */ inst->opcode = SHADER_OPCODE_SEND; - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size); inst->ex_mlen = 0; inst->header_size = 0; inst->send_has_side_effects = true; @@ -252,7 +252,7 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst) /* Update the original instruction. */ inst->opcode = SHADER_OPCODE_SEND; - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size); inst->ex_mlen = ex_mlen; inst->header_size = 0; inst->send_has_side_effects = true; @@ -1665,6 +1665,9 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) const bool has_side_effects = inst->has_side_effects(); + unsigned num_components = 0; + bool has_dest = false; + unsigned ex_mlen = 0; fs_reg payload, payload2; payload = bld.move_to_vgrf(addr, addr_sz); @@ -1717,19 +1720,23 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) switch (inst->opcode) { case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + num_components = arg.ud; + has_dest = true; inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD_CMASK, inst->exec_size, surf_type, LSC_ADDR_SIZE_A32, dims.ud /* num_coordinates */, - LSC_DATA_SIZE_D32, arg.ud /* num_channels */, + LSC_DATA_SIZE_D32, num_components, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); break; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: + num_components = arg.ud; + has_dest = false; inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE_CMASK, inst->exec_size, surf_type, LSC_ADDR_SIZE_A32, dims.ud /* num_coordinates */, - LSC_DATA_SIZE_D32, arg.ud /* num_channels */, + LSC_DATA_SIZE_D32, num_components, false /* transpose */, LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS), false /* has_dest */); @@ -1742,32 +1749,38 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) */ enum lsc_opcode opcode = (enum lsc_opcode) arg.ud; + num_components = 1; + has_dest = !inst->dst.is_null(); inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size, surf_type, LSC_ADDR_SIZE_A32, dims.ud /* num_coordinates */, lsc_bits_to_data_size(dst_sz * 8), - 1 /* num_channels */, + num_components, false /* transpose */, LSC_CACHE(devinfo, STORE, L1UC_L3WB), !inst->dst.is_null()); break; } case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: + num_components = 1; + has_dest = true; inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, inst->exec_size, surf_type, LSC_ADDR_SIZE_A32, dims.ud /* num_coordinates */, lsc_bits_to_data_size(arg.ud), - 1 /* num_channels */, + num_components, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); break; case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: + num_components = 1; + has_dest = false; inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size, surf_type, LSC_ADDR_SIZE_A32, dims.ud /* num_coordinates */, lsc_bits_to_data_size(arg.ud), - 1 /* num_channels */, + num_components, false /* transpose */, LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS), false /* has_dest */); @@ -1778,14 +1791,16 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst) /* Update the original instruction. */ inst->opcode = SHADER_OPCODE_SEND; - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size * dims.ud); inst->ex_mlen = ex_mlen; inst->header_size = 0; inst->send_has_side_effects = has_side_effects; inst->send_is_volatile = !has_side_effects; inst->send_ex_bso = surf_type == LSC_ADDR_SURFTYPE_BSS && compiler->extended_bindless_surface_offset; - inst->size_written = lsc_msg_desc_dest_len(devinfo, inst->desc) * REG_SIZE; + inst->size_written = !has_dest ? 0 : + lsc_msg_dest_len(devinfo, lsc_msg_desc_data_size(devinfo, inst->desc), + inst->exec_size * num_components) * REG_SIZE; inst->resize_sources(4); @@ -1865,8 +1880,9 @@ lower_lsc_block_logical_send(const fs_builder &bld, fs_inst *inst) LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), !write /* has_dest */); - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); - inst->size_written = lsc_msg_desc_dest_len(devinfo, inst->desc) * REG_SIZE; + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1); + inst->size_written = write ? 0 : + lsc_msg_dest_len(devinfo, LSC_DATA_SIZE_D32, arg.ud) * REG_SIZE; inst->exec_size = 1; inst->ex_mlen = write ? DIV_ROUND_UP(arg.ud, 8) : 0; inst->header_size = 0; @@ -2024,42 +2040,52 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) fs_reg payload2 = retype(bld.move_to_vgrf(src, src_comps), BRW_REGISTER_TYPE_UD); unsigned ex_mlen = src_comps * src_sz * inst->exec_size / REG_SIZE; + unsigned num_components = 0; + bool has_dest = false; switch (inst->opcode) { case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: + num_components = arg; + has_dest = true; inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD_CMASK, inst->exec_size, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, - LSC_DATA_SIZE_D32, arg /* num_channels */, + LSC_DATA_SIZE_D32, num_components, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); break; case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: + num_components = arg; + has_dest = false; inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE_CMASK, inst->exec_size, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, - LSC_DATA_SIZE_D32, arg /* num_channels */, + LSC_DATA_SIZE_D32, num_components, false /* transpose */, LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS), false /* has_dest */); break; case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: + num_components = 1; + has_dest = true; inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, inst->exec_size, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, lsc_bits_to_data_size(arg), - 1 /* num_channels */, + num_components, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); break; case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: + num_components = 1; + has_dest = false; inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, lsc_bits_to_data_size(arg), - 1 /* num_channels */, + num_components, false /* transpose */, LSC_CACHE(devinfo, STORE, L1STATE_L3MOCS), false /* has_dest */); @@ -2071,11 +2097,13 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) * cache. */ enum lsc_opcode opcode = (enum lsc_opcode) arg; + num_components = 1; + has_dest = !inst->dst.is_null(); inst->desc = lsc_msg_desc(devinfo, opcode, inst->exec_size, LSC_ADDR_SURFTYPE_FLAT, LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, lsc_bits_to_data_size(dst_sz * 8), - 1 /* num_channels */, + num_components, false /* transpose */, LSC_CACHE(devinfo, STORE, L1UC_L3WB), !inst->dst.is_null()); @@ -2083,6 +2111,8 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) } case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: + num_components = arg; + has_dest = true; inst->exec_size = 1; inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, @@ -2091,12 +2121,14 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, LSC_DATA_SIZE_D32, - arg /* num_channels */, + num_components, true /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); break; case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: + num_components = arg; + has_dest = false; inst->exec_size = 1; inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, @@ -2105,7 +2137,7 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) LSC_ADDR_SIZE_A64, 1 /* num_coordinates */, LSC_DATA_SIZE_D32, - arg /* num_channels */, + num_components, true /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), false /* has_dest */); @@ -2120,12 +2152,15 @@ lower_lsc_a64_logical_send(const fs_builder &bld, fs_inst *inst) /* Update the original instruction. */ inst->opcode = SHADER_OPCODE_SEND; - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A64, inst->exec_size); inst->ex_mlen = ex_mlen; inst->header_size = 0; inst->send_has_side_effects = has_side_effects; inst->send_is_volatile = !has_side_effects; - inst->size_written = lsc_msg_desc_dest_len(devinfo, inst->desc) * REG_SIZE; + + inst->size_written = !has_dest ? 0 : + lsc_msg_dest_len(devinfo, lsc_msg_desc_data_size(devinfo, inst->desc), + inst->exec_size * num_components) * REG_SIZE; /* Set up SFID and descriptors */ inst->sfid = GFX12_SFID_UGM; @@ -2306,7 +2341,7 @@ lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size); setup_lsc_surface_descriptors(bld, inst, inst->desc, surface.file != BAD_FILE ? @@ -2321,7 +2356,7 @@ lower_lsc_varying_pull_constant_logical_send(const fs_builder &bld, false /* transpose */, LSC_CACHE(devinfo, LOAD, L1STATE_L3MOCS), true /* has_dest */); - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, inst->exec_size); setup_lsc_surface_descriptors(bld, inst, inst->desc, surface.file != BAD_FILE ? @@ -2936,7 +2971,7 @@ brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s) /* Update the original instruction. */ inst->opcode = SHADER_OPCODE_SEND; - inst->mlen = lsc_msg_desc_src0_len(devinfo, inst->desc); + inst->mlen = lsc_msg_addr_len(devinfo, LSC_ADDR_SIZE_A32, 1); inst->send_ex_bso = surface_handle.file != BAD_FILE && s.compiler->extended_bindless_surface_offset; inst->ex_mlen = 0;