diff --git a/src/intel/compiler/brw_compile_mesh.cpp b/src/intel/compiler/brw_compile_mesh.cpp index 789f489e039..b2e81518a4c 100644 --- a/src/intel/compiler/brw_compile_mesh.cpp +++ b/src/intel/compiler/brw_compile_mesh.cpp @@ -295,6 +295,7 @@ brw_emit_urb_fence(fs_visitor &s) brw_vec8_grf(0, 0), brw_imm_ud(true), brw_imm_ud(0)); + fence->size_written = REG_SIZE * reg_unit(s.devinfo); fence->sfid = BRW_SFID_URB; /* The logical thing here would likely be a THREADGROUP fence but that's * still failing some tests like in dEQP-VK.mesh_shader.ext.query.* diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp index 0e8b35cd4ed..fb028d1c6d4 100644 --- a/src/intel/compiler/brw_from_nir.cpp +++ b/src/intel/compiler/brw_from_nir.cpp @@ -4969,6 +4969,8 @@ emit_fence(const brw_builder &bld, enum opcode opcode, uint8_t sfid, uint32_t desc, bool commit_enable, uint8_t bti) { + const struct intel_device_info *devinfo = bld.shader->devinfo; + assert(opcode == SHADER_OPCODE_INTERLOCK || opcode == SHADER_OPCODE_MEMORY_FENCE); @@ -4978,6 +4980,7 @@ emit_fence(const brw_builder &bld, enum opcode opcode, brw_imm_ud(bti)); fence->sfid = sfid; fence->desc = desc; + fence->size_written = commit_enable ? REG_SIZE * reg_unit(devinfo) : 0; return dst; } @@ -5938,7 +5941,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, unsigned fence_regs_count = 0; brw_reg fence_regs[4] = {}; - const brw_builder ubld = bld.group(8, 0); + const brw_builder ubld1 = bld.exec_all().group(1, 0); /* A memory barrier with acquire semantics requires us to * guarantee that memory operations of the specified storage @@ -5980,7 +5983,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, if (devinfo->ver >= 12 && (!nir_intrinsic_has_memory_scope(instr) || (nir_intrinsic_memory_semantics(instr) & NIR_MEMORY_ACQUIRE))) { - ubld.exec_all().group(1, 0).SYNC(TGL_SYNC_ALLWR); + ubld1.SYNC(TGL_SYNC_ALLWR); } if (devinfo->has_lsc) { @@ -5989,14 +5992,14 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, lsc_fence_descriptor_for_intrinsic(devinfo, instr); if (ugm_fence) { fence_regs[fence_regs_count++] = - emit_fence(ubld, opcode, GFX12_SFID_UGM, desc, + emit_fence(ubld1, opcode, GFX12_SFID_UGM, desc, true /* commit_enable */, 0 /* bti; ignored for LSC */); } if (tgm_fence) { fence_regs[fence_regs_count++] = - emit_fence(ubld, opcode, GFX12_SFID_TGM, desc, + emit_fence(ubld1, opcode, GFX12_SFID_TGM, desc, true /* commit_enable */, 0 /* bti; ignored for LSC */); } @@ -6009,10 +6012,10 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, * Before SLM fence compiler needs to insert SYNC.ALLWR in order * to avoid the SLM data race. */ - ubld.exec_all().group(1, 0).SYNC(TGL_SYNC_ALLWR); + ubld1.SYNC(TGL_SYNC_ALLWR); } fence_regs[fence_regs_count++] = - emit_fence(ubld, opcode, GFX12_SFID_SLM, desc, + emit_fence(ubld1, opcode, GFX12_SFID_SLM, desc, true /* commit_enable */, 0 /* BTI; ignored for LSC */); } @@ -6020,14 +6023,14 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, if (urb_fence) { assert(opcode == SHADER_OPCODE_MEMORY_FENCE); fence_regs[fence_regs_count++] = - emit_fence(ubld, opcode, BRW_SFID_URB, desc, + emit_fence(ubld1, opcode, BRW_SFID_URB, desc, true /* commit_enable */, 0 /* BTI; ignored for LSC */); } } else if (devinfo->ver >= 11) { if (tgm_fence || ugm_fence || urb_fence) { fence_regs[fence_regs_count++] = - emit_fence(ubld, opcode, GFX7_SFID_DATAPORT_DATA_CACHE, 0, + emit_fence(ubld1, opcode, GFX7_SFID_DATAPORT_DATA_CACHE, 0, true /* commit_enable HSD ES # 1404612949 */, 0 /* BTI = 0 means data cache */); } @@ -6035,7 +6038,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, if (slm_fence) { assert(opcode == SHADER_OPCODE_MEMORY_FENCE); fence_regs[fence_regs_count++] = - emit_fence(ubld, opcode, GFX7_SFID_DATAPORT_DATA_CACHE, 0, + emit_fence(ubld1, opcode, GFX7_SFID_DATAPORT_DATA_CACHE, 0, true /* commit_enable HSD ES # 1404612949 */, GFX7_BTI_SLM); } @@ -6048,7 +6051,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, if (tgm_fence || ugm_fence || slm_fence || urb_fence) { fence_regs[fence_regs_count++] = - emit_fence(ubld, opcode, GFX7_SFID_DATAPORT_DATA_CACHE, 0, + emit_fence(ubld1, opcode, GFX7_SFID_DATAPORT_DATA_CACHE, 0, commit_enable, 0 /* BTI */); } } @@ -6085,9 +6088,9 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb, */ if (instr->intrinsic == nir_intrinsic_end_invocation_interlock || fence_regs_count != 1 || devinfo->has_lsc || force_stall) { - ubld.exec_all().group(1, 0).emit( - FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), - fence_regs, fence_regs_count); + ubld1.emit(FS_OPCODE_SCHEDULING_FENCE, + retype(brw_null_reg(), BRW_TYPE_UW), + fence_regs, fence_regs_count); } break; diff --git a/src/intel/compiler/brw_validate.cpp b/src/intel/compiler/brw_validate.cpp index 91092dfaddd..12670dccb16 100644 --- a/src/intel/compiler/brw_validate.cpp +++ b/src/intel/compiler/brw_validate.cpp @@ -314,6 +314,12 @@ brw_validate(const fs_visitor &s) validate_memory_logical(s, inst); break; + case SHADER_OPCODE_MEMORY_FENCE: + case SHADER_OPCODE_INTERLOCK: + fsv_assert(inst->exec_size == 1); + fsv_assert(inst->force_writemask_all); + break; + default: break; } diff --git a/src/intel/compiler/brw_workaround.cpp b/src/intel/compiler/brw_workaround.cpp index a07468c0471..c1f3e00b5a7 100644 --- a/src/intel/compiler/brw_workaround.cpp +++ b/src/intel/compiler/brw_workaround.cpp @@ -110,6 +110,7 @@ brw_workaround_memory_fence_before_eot(fs_visitor &s) dummy_fence->sfid = GFX12_SFID_UGM; dummy_fence->desc = lsc_fence_msg_desc(s.devinfo, LSC_FENCE_TILE, LSC_FLUSH_TYPE_NONE_6, false); + dummy_fence->size_written = REG_SIZE * reg_unit(s.devinfo); ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), dst); progress = true; /* TODO: remove this break if we ever have shader with multiple EOT. */