i965: Port STATE_BASE_ADDRESS to genxml and fix bugs
This largely copies crocus's code for this (but with Gfx9+ handling). This version also fixes missing MOCS settings on several platforms, which we hadn't noticed were missing. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13480>
This commit is contained in:
committed by
Marge Bot
parent
0a64007676
commit
148ea65ee1
@@ -710,6 +710,7 @@ struct brw_context
|
||||
void (*emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint64_t imm);
|
||||
void (*emit_state_base_address)(struct brw_context *brw);
|
||||
} vtbl;
|
||||
|
||||
struct brw_bufmgr *bufmgr;
|
||||
|
||||
@@ -726,199 +726,3 @@ brw_upload_invariant_state(struct brw_context *brw)
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Define the base addresses which some state is referenced from.
|
||||
*
|
||||
* This allows us to avoid having to emit relocations for the objects,
|
||||
* and is actually required for binding table pointers on gfx6.
|
||||
*
|
||||
* Surface state base address covers binding table pointers and
|
||||
* surface state objects, but not the surfaces that the surface state
|
||||
* objects point to.
|
||||
*/
|
||||
void
|
||||
brw_upload_state_base_address(struct brw_context *brw)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &brw->screen->devinfo;
|
||||
|
||||
if (brw->batch.state_base_address_emitted)
|
||||
return;
|
||||
|
||||
/* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
|
||||
* vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
|
||||
* programmed prior to STATE_BASE_ADDRESS.
|
||||
*
|
||||
* However, given that the instruction SBA (general state base
|
||||
* address) on this chipset is always set to 0 across X and GL,
|
||||
* maybe this isn't required for us in particular.
|
||||
*/
|
||||
|
||||
uint32_t mocs = brw_mocs(&brw->isl_dev, NULL);
|
||||
|
||||
if (devinfo->ver >= 6) {
|
||||
const unsigned dc_flush =
|
||||
devinfo->ver >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
|
||||
|
||||
/* Emit a render target cache flush.
|
||||
*
|
||||
* This isn't documented anywhere in the PRM. However, it seems to be
|
||||
* necessary prior to changing the surface state base adress. We've
|
||||
* seen issues in Vulkan where we get GPU hangs when using multi-level
|
||||
* command buffers which clear depth, reset state base address, and then
|
||||
* go render stuff.
|
||||
*
|
||||
* Normally, in GL, we would trust the kernel to do sufficient stalls
|
||||
* and flushes prior to executing our batch. However, it doesn't seem
|
||||
* as if the kernel's flushing is always sufficient and we don't want to
|
||||
* rely on it.
|
||||
*
|
||||
* We make this an end-of-pipe sync instead of a normal flush because we
|
||||
* do not know the current status of the GPU. On Haswell at least,
|
||||
* having a fast-clear operation in flight at the same time as a normal
|
||||
* rendering operation can cause hangs. Since the kernel's flushing is
|
||||
* insufficient, we need to ensure that any rendering operations from
|
||||
* other processes are definitely complete before we try to do our own
|
||||
* rendering. It's a bit of a big hammer but it appears to work.
|
||||
*/
|
||||
brw_emit_end_of_pipe_sync(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
dc_flush);
|
||||
}
|
||||
|
||||
if (devinfo->ver >= 8) {
|
||||
/* STATE_BASE_ADDRESS has issues with 48-bit address spaces. If the
|
||||
* address + size as seen by STATE_BASE_ADDRESS overflows 48 bits,
|
||||
* the GPU appears to treat all accesses to the buffer as being out
|
||||
* of bounds and returns zero. To work around this, we pin all SBAs
|
||||
* to the bottom 4GB.
|
||||
*/
|
||||
int pkt_len = devinfo->ver >= 10 ? 22 : (devinfo->ver >= 9 ? 19 : 16);
|
||||
|
||||
BEGIN_BATCH(pkt_len);
|
||||
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (pkt_len - 2));
|
||||
/* General state base address: stateless DP read/write requests */
|
||||
OUT_BATCH(mocs << 4 | 1);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(mocs << 16);
|
||||
/* Surface state base address: */
|
||||
OUT_RELOC64(brw->batch.state.bo, RELOC_32BIT, mocs << 4 | 1);
|
||||
/* Dynamic state base address: */
|
||||
OUT_RELOC64(brw->batch.state.bo, RELOC_32BIT, mocs << 4 | 1);
|
||||
/* Indirect object base address: MEDIA_OBJECT data */
|
||||
OUT_BATCH(mocs << 4 | 1);
|
||||
OUT_BATCH(0);
|
||||
/* Instruction base address: shader kernels (incl. SIP) */
|
||||
OUT_RELOC64(brw->cache.bo, RELOC_32BIT, mocs << 4 | 1);
|
||||
/* General state buffer size */
|
||||
OUT_BATCH(0xfffff001);
|
||||
/* Dynamic state buffer size */
|
||||
OUT_BATCH(ALIGN(MAX_STATE_SIZE, 4096) | 1);
|
||||
/* Indirect object upper bound */
|
||||
OUT_BATCH(0xfffff001);
|
||||
/* Instruction access upper bound */
|
||||
OUT_BATCH(ALIGN(brw->cache.bo->size, 4096) | 1);
|
||||
if (devinfo->ver >= 9) {
|
||||
OUT_BATCH(1);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
}
|
||||
if (devinfo->ver >= 10) {
|
||||
OUT_BATCH(1);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
}
|
||||
ADVANCE_BATCH();
|
||||
} else if (devinfo->ver >= 6) {
|
||||
BEGIN_BATCH(10);
|
||||
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
|
||||
OUT_BATCH(mocs << 8 | /* General State Memory Object Control State */
|
||||
mocs << 4 | /* Stateless Data Port Access Memory Object Control State */
|
||||
1); /* General State Base Address Modify Enable */
|
||||
/* Surface state base address:
|
||||
* BINDING_TABLE_STATE
|
||||
* SURFACE_STATE
|
||||
*/
|
||||
OUT_RELOC(brw->batch.state.bo, 0, 1);
|
||||
/* Dynamic state base address:
|
||||
* SAMPLER_STATE
|
||||
* SAMPLER_BORDER_COLOR_STATE
|
||||
* CLIP, SF, WM/CC viewport state
|
||||
* COLOR_CALC_STATE
|
||||
* DEPTH_STENCIL_STATE
|
||||
* BLEND_STATE
|
||||
* Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
|
||||
* Disable is clear, which we rely on)
|
||||
*/
|
||||
OUT_RELOC(brw->batch.state.bo, 0, 1);
|
||||
|
||||
OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
|
||||
|
||||
/* Instruction base address: shader kernels (incl. SIP) */
|
||||
OUT_RELOC(brw->cache.bo, 0, 1);
|
||||
|
||||
OUT_BATCH(1); /* General state upper bound */
|
||||
/* Dynamic state upper bound. Although the documentation says that
|
||||
* programming it to zero will cause it to be ignored, that is a lie.
|
||||
* If this isn't programmed to a real bound, the sampler border color
|
||||
* pointer is rejected, causing border color to mysteriously fail.
|
||||
*/
|
||||
OUT_BATCH(0xfffff001);
|
||||
OUT_BATCH(1); /* Indirect object upper bound */
|
||||
OUT_BATCH(1); /* Instruction access upper bound */
|
||||
ADVANCE_BATCH();
|
||||
} else if (devinfo->ver == 5) {
|
||||
BEGIN_BATCH(8);
|
||||
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
|
||||
OUT_BATCH(1); /* General state base address */
|
||||
OUT_RELOC(brw->batch.state.bo, 0, 1); /* Surface state base address */
|
||||
OUT_BATCH(1); /* Indirect object base address */
|
||||
OUT_RELOC(brw->cache.bo, 0, 1); /* Instruction base address */
|
||||
OUT_BATCH(0xfffff001); /* General state upper bound */
|
||||
OUT_BATCH(1); /* Indirect object upper bound */
|
||||
OUT_BATCH(1); /* Instruction access upper bound */
|
||||
ADVANCE_BATCH();
|
||||
} else {
|
||||
BEGIN_BATCH(6);
|
||||
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
|
||||
OUT_BATCH(1); /* General state base address */
|
||||
OUT_RELOC(brw->batch.state.bo, 0, 1); /* Surface state base address */
|
||||
OUT_BATCH(1); /* Indirect object base address */
|
||||
OUT_BATCH(1); /* General state upper bound */
|
||||
OUT_BATCH(1); /* Indirect object upper bound */
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
if (devinfo->ver >= 6) {
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
|
||||
PIPE_CONTROL_STATE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
|
||||
}
|
||||
|
||||
/* According to section 3.6.1 of VOL1 of the 965 PRM,
|
||||
* STATE_BASE_ADDRESS updates require a reissue of:
|
||||
*
|
||||
* 3DSTATE_PIPELINE_POINTERS
|
||||
* 3DSTATE_BINDING_TABLE_POINTERS
|
||||
* MEDIA_STATE_POINTERS
|
||||
*
|
||||
* and this continues through Ironlake. The Sandy Bridge PRM, vol
|
||||
* 1 part 1 says that the folowing packets must be reissued:
|
||||
*
|
||||
* 3DSTATE_CC_POINTERS
|
||||
* 3DSTATE_BINDING_TABLE_POINTERS
|
||||
* 3DSTATE_SAMPLER_STATE_POINTERS
|
||||
* 3DSTATE_VIEWPORT_STATE_POINTERS
|
||||
* MEDIA_STATE_POINTERS
|
||||
*
|
||||
* Those are always reissued following SBA updates anyway (new
|
||||
* batch time), except in the case of the program cache BO
|
||||
* changing. Having a separate state flag makes the sequence more
|
||||
* obvious.
|
||||
*/
|
||||
|
||||
brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
|
||||
brw->batch.state_base_address_emitted = true;
|
||||
}
|
||||
|
||||
@@ -142,8 +142,6 @@ void brw_upload_invariant_state(struct brw_context *brw);
|
||||
uint32_t
|
||||
brw_depthbuffer_format(struct brw_context *brw);
|
||||
|
||||
void brw_upload_state_base_address(struct brw_context *brw);
|
||||
|
||||
/* gfx8_depth_state.c */
|
||||
void gfx8_write_pma_stall_bits(struct brw_context *brw,
|
||||
uint32_t pma_stall_bits);
|
||||
|
||||
@@ -683,7 +683,7 @@ brw_upload_pipeline_state(struct brw_context *brw,
|
||||
brw_upload_programs(brw, pipeline);
|
||||
merge_ctx_state(brw, &state);
|
||||
|
||||
brw_upload_state_base_address(brw);
|
||||
brw->vtbl.emit_state_base_address(brw);
|
||||
|
||||
const struct brw_tracked_state *atoms =
|
||||
brw_get_pipeline_atoms(brw, pipeline);
|
||||
|
||||
@@ -337,7 +337,7 @@ retry:
|
||||
brw_emit_post_sync_nonzero_flush(brw);
|
||||
#endif
|
||||
|
||||
brw_upload_state_base_address(brw);
|
||||
brw->vtbl.emit_state_base_address(brw);
|
||||
|
||||
#if GFX_VER >= 8
|
||||
gfx7_l3_state.emit(brw);
|
||||
|
||||
@@ -97,6 +97,162 @@ emit_lri(struct brw_context *brw, uint32_t reg, uint32_t imm)
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Define the base addresses which some state is referenced from.
|
||||
*
|
||||
* This allows us to avoid having to emit relocations for the objects,
|
||||
* and is actually required for binding table pointers on Gfx6.
|
||||
*
|
||||
* Surface state base address covers binding table pointers and surface state
|
||||
* objects, but not the surfaces that the surface state objects point to.
|
||||
*/
|
||||
static void
|
||||
genX(emit_state_base_address)(struct brw_context *brw)
|
||||
{
|
||||
if (brw->batch.state_base_address_emitted)
|
||||
return;
|
||||
|
||||
/* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
|
||||
* vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
|
||||
* programmed prior to STATE_BASE_ADDRESS.
|
||||
*
|
||||
* However, given that the instruction SBA (general state base
|
||||
* address) on this chipset is always set to 0 across X and GL,
|
||||
* maybe this isn't required for us in particular.
|
||||
*/
|
||||
|
||||
UNUSED uint32_t mocs = brw_mocs(&brw->isl_dev, NULL);
|
||||
|
||||
/* Flush before updating STATE_BASE_ADDRESS */
|
||||
#if GFX_VER >= 6
|
||||
const unsigned dc_flush =
|
||||
GFX_VER >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
|
||||
|
||||
/* Emit a render target cache flush.
|
||||
*
|
||||
* This isn't documented anywhere in the PRM. However, it seems to be
|
||||
* necessary prior to changing the surface state base adress. We've
|
||||
* seen issues in Vulkan where we get GPU hangs when using multi-level
|
||||
* command buffers which clear depth, reset state base address, and then
|
||||
* go render stuff.
|
||||
*
|
||||
* Normally, in GL, we would trust the kernel to do sufficient stalls
|
||||
* and flushes prior to executing our batch. However, it doesn't seem
|
||||
* as if the kernel's flushing is always sufficient and we don't want to
|
||||
* rely on it.
|
||||
*
|
||||
* We make this an end-of-pipe sync instead of a normal flush because we
|
||||
* do not know the current status of the GPU. On Haswell at least,
|
||||
* having a fast-clear operation in flight at the same time as a normal
|
||||
* rendering operation can cause hangs. Since the kernel's flushing is
|
||||
* insufficient, we need to ensure that any rendering operations from
|
||||
* other processes are definitely complete before we try to do our own
|
||||
* rendering. It's a bit of a big hammer but it appears to work.
|
||||
*/
|
||||
brw_emit_end_of_pipe_sync(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
dc_flush);
|
||||
#endif
|
||||
|
||||
brw_batch_emit(brw, GENX(STATE_BASE_ADDRESS), sba) {
|
||||
/* Set base addresses */
|
||||
sba.GeneralStateBaseAddressModifyEnable = true;
|
||||
|
||||
#if GFX_VER >= 6
|
||||
sba.DynamicStateBaseAddressModifyEnable = true;
|
||||
sba.DynamicStateBaseAddress = ro_bo(brw->batch.state.bo, 0);
|
||||
#endif
|
||||
|
||||
sba.SurfaceStateBaseAddressModifyEnable = true;
|
||||
sba.SurfaceStateBaseAddress = ro_bo(brw->batch.state.bo, 0);
|
||||
|
||||
sba.IndirectObjectBaseAddressModifyEnable = true;
|
||||
|
||||
#if GFX_VER >= 5
|
||||
sba.InstructionBaseAddressModifyEnable = true;
|
||||
sba.InstructionBaseAddress = ro_bo(brw->cache.bo, 0);
|
||||
#endif
|
||||
|
||||
/* Set buffer sizes on Gfx8+ or upper bounds on Gfx4-7 */
|
||||
#if GFX_VER >= 8
|
||||
sba.GeneralStateBufferSize = 0xfffff;
|
||||
sba.IndirectObjectBufferSize = 0xfffff;
|
||||
sba.InstructionBufferSize = 0xfffff;
|
||||
sba.DynamicStateBufferSize = MAX_STATE_SIZE;
|
||||
|
||||
sba.GeneralStateBufferSizeModifyEnable = true;
|
||||
sba.DynamicStateBufferSizeModifyEnable = true;
|
||||
sba.IndirectObjectBufferSizeModifyEnable = true;
|
||||
sba.InstructionBuffersizeModifyEnable = true;
|
||||
#else
|
||||
sba.GeneralStateAccessUpperBoundModifyEnable = true;
|
||||
sba.IndirectObjectAccessUpperBoundModifyEnable = true;
|
||||
|
||||
#if GFX_VER >= 5
|
||||
sba.InstructionAccessUpperBoundModifyEnable = true;
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 6
|
||||
/* Dynamic state upper bound. Although the documentation says that
|
||||
* programming it to zero will cause it to be ignored, that is a lie.
|
||||
* If this isn't programmed to a real bound, the sampler border color
|
||||
* pointer is rejected, causing border color to mysteriously fail.
|
||||
*/
|
||||
sba.DynamicStateAccessUpperBound = ro_bo(NULL, 0xfffff000);
|
||||
sba.DynamicStateAccessUpperBoundModifyEnable = true;
|
||||
#else
|
||||
/* Same idea but using General State Base Address on Gfx4-5 */
|
||||
sba.GeneralStateAccessUpperBound = ro_bo(NULL, 0xfffff000);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if GFX_VER >= 6
|
||||
/* The hardware appears to pay attention to the MOCS fields even
|
||||
* if you don't set the "Address Modify Enable" bit for the base.
|
||||
*/
|
||||
sba.GeneralStateMOCS = mocs;
|
||||
sba.StatelessDataPortAccessMOCS = mocs;
|
||||
sba.DynamicStateMOCS = mocs;
|
||||
sba.IndirectObjectMOCS = mocs;
|
||||
sba.InstructionMOCS = mocs;
|
||||
sba.SurfaceStateMOCS = mocs;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Flush after updating STATE_BASE_ADDRESS */
|
||||
#if GFX_VER >= 6
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
|
||||
PIPE_CONTROL_STATE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
|
||||
#endif
|
||||
|
||||
/* According to section 3.6.1 of VOL1 of the 965 PRM,
|
||||
* STATE_BASE_ADDRESS updates require a reissue of:
|
||||
*
|
||||
* 3DSTATE_PIPELINE_POINTERS
|
||||
* 3DSTATE_BINDING_TABLE_POINTERS
|
||||
* MEDIA_STATE_POINTERS
|
||||
*
|
||||
* and this continues through Ironlake. The Sandy Bridge PRM, vol
|
||||
* 1 part 1 says that the folowing packets must be reissued:
|
||||
*
|
||||
* 3DSTATE_CC_POINTERS
|
||||
* 3DSTATE_BINDING_TABLE_POINTERS
|
||||
* 3DSTATE_SAMPLER_STATE_POINTERS
|
||||
* 3DSTATE_VIEWPORT_STATE_POINTERS
|
||||
* MEDIA_STATE_POINTERS
|
||||
*
|
||||
* Those are always reissued following SBA updates anyway (new
|
||||
* batch time), except in the case of the program cache BO
|
||||
* changing. Having a separate state flag makes the sequence more
|
||||
* obvious.
|
||||
*/
|
||||
brw->ctx.NewDriverState |= BRW_NEW_STATE_BASE_ADDRESS;
|
||||
brw->batch.state_base_address_emitted = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Polygon stipple packet
|
||||
*/
|
||||
@@ -5918,5 +6074,7 @@ genX(init_atoms)(struct brw_context *brw)
|
||||
brw->vtbl.emit_compute_walker = genX(emit_gpgpu_walker);
|
||||
#endif
|
||||
|
||||
brw->vtbl.emit_state_base_address = genX(emit_state_base_address);
|
||||
|
||||
assert(brw->screen->devinfo.verx10 == GFX_VERx10);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user