intel: Move the D16 workarounds out of ISL
Implement the workarounds in anv and iris instead. Before this commit, ISL unconditionally modified workaround registers while filling out depth stencil state. To account for this, drivers unconditionally stalled prior to emitting depth stencil packets. This hurt performance. By having the drivers perform the workarounds, they can choose when to modify the relevant registers. The drivers now avoid emitting the workaround for NULL depth buffers. This reduces stalls and leads to better performance. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (the ISL/Anv bits) Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (the Iris bits) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11454>
This commit is contained in:
@@ -274,21 +274,9 @@ iris_blorp_exec(struct blorp_batch *blorp_batch,
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 == 120
|
||||
if (!(blorp_batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL)) {
|
||||
/* Wa_14010455700
|
||||
*
|
||||
* ISL will change some CHICKEN registers depending on the depth surface
|
||||
* format, along with emitting the depth and stencil packets. In that
|
||||
* case, we want to do a depth flush and stall, so the pipeline is not
|
||||
* using these settings while we change the registers.
|
||||
*/
|
||||
iris_emit_end_of_pipe_sync(batch,
|
||||
"Workaround: Stop pipeline for 14010455700",
|
||||
PIPE_CONTROL_DEPTH_STALL |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH);
|
||||
}
|
||||
#endif
|
||||
if (params->depth.enabled &&
|
||||
!(blorp_batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
|
||||
genX(emit_depth_state_workarounds)(ice, batch, ¶ms->depth.surf);
|
||||
|
||||
/* Flush the render cache in cases where the same surface is used with
|
||||
* different aux modes, which can lead to GPU hangs. Invalidation of
|
||||
|
||||
@@ -1140,8 +1140,7 @@ struct iris_depth_buffer_state {
|
||||
uint32_t packets[GENX(3DSTATE_DEPTH_BUFFER_length) +
|
||||
GENX(3DSTATE_STENCIL_BUFFER_length) +
|
||||
GENX(3DSTATE_HIER_DEPTH_BUFFER_length) +
|
||||
GENX(3DSTATE_CLEAR_PARAMS_length) +
|
||||
GENX(MI_LOAD_REGISTER_IMM_length) * 2];
|
||||
GENX(3DSTATE_CLEAR_PARAMS_length)];
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -6252,21 +6251,11 @@ iris_upload_dirty_render_state(struct iris_context *ice,
|
||||
}
|
||||
}
|
||||
|
||||
#if GFX_VERx10 == 120
|
||||
/* Wa_14010455700
|
||||
*
|
||||
* ISL will change some CHICKEN registers depending on the depth surface
|
||||
* format, along with emitting the depth and stencil packets. In that
|
||||
* case, we want to do a depth flush and stall, so the pipeline is not
|
||||
* using these settings while we change the registers.
|
||||
*/
|
||||
iris_emit_end_of_pipe_sync(batch,
|
||||
"Workaround: Stop pipeline for 14010455700",
|
||||
PIPE_CONTROL_DEPTH_STALL |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH);
|
||||
#endif
|
||||
|
||||
iris_batch_emit(batch, cso_z->packets, batch->screen->isl_dev.ds.size);
|
||||
|
||||
if (zres)
|
||||
genX(emit_depth_state_workarounds)(ice, batch, &zres->surf);
|
||||
|
||||
if (GFX_VER >= 12) {
|
||||
/* Wa_1408224581
|
||||
*
|
||||
|
||||
@@ -266,10 +266,6 @@ isl_device_init(struct isl_device *dev,
|
||||
dev->ds.hiz_offset = 0;
|
||||
}
|
||||
|
||||
if (ISL_GFX_VERX10(dev) == 120) {
|
||||
dev->ds.size += GFX12_MI_LOAD_REGISTER_IMM_length * 4 * 2;
|
||||
}
|
||||
|
||||
isl_device_setup_mocs(dev);
|
||||
}
|
||||
|
||||
|
||||
@@ -304,50 +304,6 @@ isl_genX(emit_depth_stencil_hiz_s)(const struct isl_device *dev, void *batch,
|
||||
GENX(3DSTATE_HIER_DEPTH_BUFFER_pack)(NULL, dw, &hiz);
|
||||
dw += GENX(3DSTATE_HIER_DEPTH_BUFFER_length);
|
||||
|
||||
#if GFX_VERx10 == 120
|
||||
/* Wa_14010455700
|
||||
*
|
||||
* To avoid sporadic corruptions “Set 0x7010[9] when Depth Buffer Surface
|
||||
* Format is D16_UNORM , surface type is not NULL & 1X_MSAA”.
|
||||
*/
|
||||
bool enable_14010455700 =
|
||||
info->depth_surf && info->depth_surf->samples == 1 &&
|
||||
db.SurfaceType != SURFTYPE_NULL && db.SurfaceFormat == D16_UNORM;
|
||||
struct GENX(COMMON_SLICE_CHICKEN1) chicken1 = {
|
||||
.HIZPlaneOptimizationdisablebit = enable_14010455700,
|
||||
.HIZPlaneOptimizationdisablebitMask = true,
|
||||
};
|
||||
uint32_t chicken1_dw;
|
||||
GENX(COMMON_SLICE_CHICKEN1_pack)(NULL, &chicken1_dw, &chicken1);
|
||||
|
||||
struct GENX(MI_LOAD_REGISTER_IMM) lri = {
|
||||
GENX(MI_LOAD_REGISTER_IMM_header),
|
||||
.RegisterOffset = GENX(COMMON_SLICE_CHICKEN1_num),
|
||||
.DataDWord = chicken1_dw,
|
||||
};
|
||||
GENX(MI_LOAD_REGISTER_IMM_pack)(NULL, dw, &lri);
|
||||
dw += GENX(MI_LOAD_REGISTER_IMM_length);
|
||||
|
||||
/* Wa_1806527549
|
||||
*
|
||||
* Set HIZ_CHICKEN (7018h) bit 13 = 1 when depth buffer is D16_UNORM.
|
||||
*/
|
||||
struct GENX(HIZ_CHICKEN) hiz_chicken = {
|
||||
.HZDepthTestLEGEOptimizationDisable = db.SurfaceFormat == D16_UNORM,
|
||||
.HZDepthTestLEGEOptimizationDisableMask = true,
|
||||
};
|
||||
uint32_t hiz_chicken_dw;
|
||||
GENX(HIZ_CHICKEN_pack)(NULL, &hiz_chicken_dw, &hiz_chicken);
|
||||
|
||||
struct GENX(MI_LOAD_REGISTER_IMM) lri2 = {
|
||||
GENX(MI_LOAD_REGISTER_IMM_header),
|
||||
.RegisterOffset = GENX(HIZ_CHICKEN_num),
|
||||
.DataDWord = hiz_chicken_dw,
|
||||
};
|
||||
GENX(MI_LOAD_REGISTER_IMM_pack)(NULL, dw, &lri2);
|
||||
dw += GENX(MI_LOAD_REGISTER_IMM_length);
|
||||
#endif
|
||||
|
||||
GENX(3DSTATE_CLEAR_PARAMS_pack)(NULL, dw, &clear);
|
||||
dw += GENX(3DSTATE_CLEAR_PARAMS_length);
|
||||
#endif
|
||||
|
||||
@@ -264,21 +264,9 @@ genX(blorp_exec)(struct blorp_batch *batch,
|
||||
"before blorp BTI change");
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 == 120
|
||||
if (!(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL)) {
|
||||
/* Wa_14010455700
|
||||
*
|
||||
* ISL will change some CHICKEN registers depending on the depth surface
|
||||
* format, along with emitting the depth and stencil packets. In that
|
||||
* case, we want to do a depth flush and stall, so the pipeline is not
|
||||
* using these settings while we change the registers.
|
||||
*/
|
||||
cmd_buffer->state.pending_pipe_bits |=
|
||||
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_DEPTH_STALL_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT;
|
||||
}
|
||||
#endif
|
||||
if (params->depth.enabled &&
|
||||
!(batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL))
|
||||
genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, ¶ms->depth.surf);
|
||||
|
||||
#if GFX_VER == 7
|
||||
/* The MI_LOAD/STORE_REGISTER_MEM commands which BLORP uses to implement
|
||||
|
||||
@@ -5855,6 +5855,9 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
|
||||
|
||||
isl_emit_depth_stencil_hiz_s(&device->isl_dev, dw, &info);
|
||||
|
||||
if (info.depth_surf)
|
||||
genX(cmd_buffer_emit_gfx12_depth_wa)(cmd_buffer, info.depth_surf);
|
||||
|
||||
if (GFX_VER >= 12) {
|
||||
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
@@ -6293,22 +6296,6 @@ cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
|
||||
"change RT");
|
||||
#endif
|
||||
|
||||
#if GFX_VERx10 == 120
|
||||
/* Wa_14010455700
|
||||
*
|
||||
* ISL will change some CHICKEN registers depending on the depth surface
|
||||
* format, along with emitting the depth and stencil packets. In that case,
|
||||
* we want to do a depth flush and stall, so the pipeline is not using these
|
||||
* settings while we change the registers.
|
||||
*/
|
||||
anv_add_pending_pipe_bits(cmd_buffer,
|
||||
ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
|
||||
ANV_PIPE_DEPTH_STALL_BIT |
|
||||
ANV_PIPE_END_OF_PIPE_SYNC_BIT,
|
||||
"change DS");
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
#endif
|
||||
|
||||
cmd_buffer_emit_depth_stencil(cmd_buffer);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user