brw: Use correct builder size for MEMORY_FENCE/INTERLOCK virtual opcodes
brw_memory_fence() overrides the instructions generated by the MEMORY_FENCE or INTERLOCK opcodes to be force_writemask_all with exec_size == 1. But the IR was emitting it in SIMD8 (regardless of dispatch width). Instead, just emit the IR as SIMD1/NoMask so the IR matches what we actually generate. Have size_written indicate that the entire destination is written, however, as it is ultimately going to be a SEND that writes a whole register. We were also using a UD register for the source of FS_OPCODE_SCHEDULING_FENCE when the generator overrides it to UW, so just specify UW in the IR as well so that they line up. Also add validation for MEMORY_FENCE/INTERLOCK that we've done the exec_size and masking right in the IR. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33297>
This commit is contained in:
committed by
Marge Bot
parent
accef5e8f5
commit
c0a32af125
@@ -295,6 +295,7 @@ brw_emit_urb_fence(fs_visitor &s)
|
||||
brw_vec8_grf(0, 0),
|
||||
brw_imm_ud(true),
|
||||
brw_imm_ud(0));
|
||||
fence->size_written = REG_SIZE * reg_unit(s.devinfo);
|
||||
fence->sfid = BRW_SFID_URB;
|
||||
/* The logical thing here would likely be a THREADGROUP fence but that's
|
||||
* still failing some tests like in dEQP-VK.mesh_shader.ext.query.*
|
||||
|
||||
@@ -4969,6 +4969,8 @@ emit_fence(const brw_builder &bld, enum opcode opcode,
|
||||
uint8_t sfid, uint32_t desc,
|
||||
bool commit_enable, uint8_t bti)
|
||||
{
|
||||
const struct intel_device_info *devinfo = bld.shader->devinfo;
|
||||
|
||||
assert(opcode == SHADER_OPCODE_INTERLOCK ||
|
||||
opcode == SHADER_OPCODE_MEMORY_FENCE);
|
||||
|
||||
@@ -4978,6 +4980,7 @@ emit_fence(const brw_builder &bld, enum opcode opcode,
|
||||
brw_imm_ud(bti));
|
||||
fence->sfid = sfid;
|
||||
fence->desc = desc;
|
||||
fence->size_written = commit_enable ? REG_SIZE * reg_unit(devinfo) : 0;
|
||||
|
||||
return dst;
|
||||
}
|
||||
@@ -5938,7 +5941,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||
unsigned fence_regs_count = 0;
|
||||
brw_reg fence_regs[4] = {};
|
||||
|
||||
const brw_builder ubld = bld.group(8, 0);
|
||||
const brw_builder ubld1 = bld.exec_all().group(1, 0);
|
||||
|
||||
/* A memory barrier with acquire semantics requires us to
|
||||
* guarantee that memory operations of the specified storage
|
||||
@@ -5980,7 +5983,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||
if (devinfo->ver >= 12 &&
|
||||
(!nir_intrinsic_has_memory_scope(instr) ||
|
||||
(nir_intrinsic_memory_semantics(instr) & NIR_MEMORY_ACQUIRE))) {
|
||||
ubld.exec_all().group(1, 0).SYNC(TGL_SYNC_ALLWR);
|
||||
ubld1.SYNC(TGL_SYNC_ALLWR);
|
||||
}
|
||||
|
||||
if (devinfo->has_lsc) {
|
||||
@@ -5989,14 +5992,14 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||
lsc_fence_descriptor_for_intrinsic(devinfo, instr);
|
||||
if (ugm_fence) {
|
||||
fence_regs[fence_regs_count++] =
|
||||
emit_fence(ubld, opcode, GFX12_SFID_UGM, desc,
|
||||
emit_fence(ubld1, opcode, GFX12_SFID_UGM, desc,
|
||||
true /* commit_enable */,
|
||||
0 /* bti; ignored for LSC */);
|
||||
}
|
||||
|
||||
if (tgm_fence) {
|
||||
fence_regs[fence_regs_count++] =
|
||||
emit_fence(ubld, opcode, GFX12_SFID_TGM, desc,
|
||||
emit_fence(ubld1, opcode, GFX12_SFID_TGM, desc,
|
||||
true /* commit_enable */,
|
||||
0 /* bti; ignored for LSC */);
|
||||
}
|
||||
@@ -6009,10 +6012,10 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||
* Before SLM fence compiler needs to insert SYNC.ALLWR in order
|
||||
* to avoid the SLM data race.
|
||||
*/
|
||||
ubld.exec_all().group(1, 0).SYNC(TGL_SYNC_ALLWR);
|
||||
ubld1.SYNC(TGL_SYNC_ALLWR);
|
||||
}
|
||||
fence_regs[fence_regs_count++] =
|
||||
emit_fence(ubld, opcode, GFX12_SFID_SLM, desc,
|
||||
emit_fence(ubld1, opcode, GFX12_SFID_SLM, desc,
|
||||
true /* commit_enable */,
|
||||
0 /* BTI; ignored for LSC */);
|
||||
}
|
||||
@@ -6020,14 +6023,14 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||
if (urb_fence) {
|
||||
assert(opcode == SHADER_OPCODE_MEMORY_FENCE);
|
||||
fence_regs[fence_regs_count++] =
|
||||
emit_fence(ubld, opcode, BRW_SFID_URB, desc,
|
||||
emit_fence(ubld1, opcode, BRW_SFID_URB, desc,
|
||||
true /* commit_enable */,
|
||||
0 /* BTI; ignored for LSC */);
|
||||
}
|
||||
} else if (devinfo->ver >= 11) {
|
||||
if (tgm_fence || ugm_fence || urb_fence) {
|
||||
fence_regs[fence_regs_count++] =
|
||||
emit_fence(ubld, opcode, GFX7_SFID_DATAPORT_DATA_CACHE, 0,
|
||||
emit_fence(ubld1, opcode, GFX7_SFID_DATAPORT_DATA_CACHE, 0,
|
||||
true /* commit_enable HSD ES # 1404612949 */,
|
||||
0 /* BTI = 0 means data cache */);
|
||||
}
|
||||
@@ -6035,7 +6038,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||
if (slm_fence) {
|
||||
assert(opcode == SHADER_OPCODE_MEMORY_FENCE);
|
||||
fence_regs[fence_regs_count++] =
|
||||
emit_fence(ubld, opcode, GFX7_SFID_DATAPORT_DATA_CACHE, 0,
|
||||
emit_fence(ubld1, opcode, GFX7_SFID_DATAPORT_DATA_CACHE, 0,
|
||||
true /* commit_enable HSD ES # 1404612949 */,
|
||||
GFX7_BTI_SLM);
|
||||
}
|
||||
@@ -6048,7 +6051,7 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||
|
||||
if (tgm_fence || ugm_fence || slm_fence || urb_fence) {
|
||||
fence_regs[fence_regs_count++] =
|
||||
emit_fence(ubld, opcode, GFX7_SFID_DATAPORT_DATA_CACHE, 0,
|
||||
emit_fence(ubld1, opcode, GFX7_SFID_DATAPORT_DATA_CACHE, 0,
|
||||
commit_enable, 0 /* BTI */);
|
||||
}
|
||||
}
|
||||
@@ -6085,9 +6088,9 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||
*/
|
||||
if (instr->intrinsic == nir_intrinsic_end_invocation_interlock ||
|
||||
fence_regs_count != 1 || devinfo->has_lsc || force_stall) {
|
||||
ubld.exec_all().group(1, 0).emit(
|
||||
FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(),
|
||||
fence_regs, fence_regs_count);
|
||||
ubld1.emit(FS_OPCODE_SCHEDULING_FENCE,
|
||||
retype(brw_null_reg(), BRW_TYPE_UW),
|
||||
fence_regs, fence_regs_count);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
@@ -314,6 +314,12 @@ brw_validate(const fs_visitor &s)
|
||||
validate_memory_logical(s, inst);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_MEMORY_FENCE:
|
||||
case SHADER_OPCODE_INTERLOCK:
|
||||
fsv_assert(inst->exec_size == 1);
|
||||
fsv_assert(inst->force_writemask_all);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -110,6 +110,7 @@ brw_workaround_memory_fence_before_eot(fs_visitor &s)
|
||||
dummy_fence->sfid = GFX12_SFID_UGM;
|
||||
dummy_fence->desc = lsc_fence_msg_desc(s.devinfo, LSC_FENCE_TILE,
|
||||
LSC_FLUSH_TYPE_NONE_6, false);
|
||||
dummy_fence->size_written = REG_SIZE * reg_unit(s.devinfo);
|
||||
ubld.emit(FS_OPCODE_SCHEDULING_FENCE, ubld.null_reg_ud(), dst);
|
||||
progress = true;
|
||||
/* TODO: remove this break if we ever have shader with multiple EOT. */
|
||||
|
||||
Reference in New Issue
Block a user