i965/gen7: Emit workaround flush when changing GS enable state.
v2: Don't go to extra work to avoid extraneous flushes. (Previous experiments in the kernel have suggested that flushing the pipeline when it is already empty is extremely cheap). Cc: "10.0" <mesa-stable@lists.freedesktop.org> Reviewed-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
@@ -764,6 +764,7 @@ brwCreateContext(gl_api api,
|
||||
|
||||
brw->prim_restart.in_progress = false;
|
||||
brw->prim_restart.enable_cut_index = false;
|
||||
brw->gs.enabled = false;
|
||||
|
||||
if (brw->gen < 6) {
|
||||
brw->curbe.last_buf = calloc(1, 4096);
|
||||
|
||||
@@ -1300,6 +1300,12 @@ struct brw_context
|
||||
struct {
|
||||
struct brw_stage_state base;
|
||||
struct brw_gs_prog_data *prog_data;
|
||||
|
||||
/**
|
||||
* True if the 3DSTATE_GS command most recently emitted to the 3D
|
||||
* pipeline enabled the GS; false otherwise.
|
||||
*/
|
||||
bool enabled;
|
||||
} gs;
|
||||
|
||||
struct {
|
||||
|
||||
@@ -402,6 +402,21 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw,
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
/**
|
||||
* From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
|
||||
* Geometry > Geometry Shader > State:
|
||||
*
|
||||
* "Note: Because of corruption in IVB:GT2, software needs to flush the
|
||||
* whole fixed function pipeline when the GS enable changes value in
|
||||
* the 3DSTATE_GS."
|
||||
*
|
||||
* The hardware architects have clarified that in this context "flush the
|
||||
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
|
||||
* Stall" bit set.
|
||||
*/
|
||||
if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled)
|
||||
gen7_emit_cs_stall_flush(brw);
|
||||
|
||||
BEGIN_BATCH(7);
|
||||
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
|
||||
OUT_BATCH(0);
|
||||
@@ -411,6 +426,7 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw,
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
brw->gs.enabled = false;
|
||||
}
|
||||
|
||||
/* 3DSTATE_STREAMOUT
|
||||
|
||||
@@ -80,6 +80,21 @@ upload_gs_state(struct brw_context *brw)
|
||||
|
||||
gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
|
||||
|
||||
/**
|
||||
* From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
|
||||
* Geometry > Geometry Shader > State:
|
||||
*
|
||||
* "Note: Because of corruption in IVB:GT2, software needs to flush the
|
||||
* whole fixed function pipeline when the GS enable changes value in
|
||||
* the 3DSTATE_GS."
|
||||
*
|
||||
* The hardware architects have clarified that in this context "flush the
|
||||
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
|
||||
* Stall" bit set.
|
||||
*/
|
||||
if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled != active)
|
||||
gen7_emit_cs_stall_flush(brw);
|
||||
|
||||
if (active) {
|
||||
BEGIN_BATCH(7);
|
||||
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
|
||||
@@ -176,6 +191,7 @@ upload_gs_state(struct brw_context *brw)
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
brw->gs.enabled = active;
|
||||
}
|
||||
|
||||
const struct brw_tracked_state gen7_gs_state = {
|
||||
|
||||
@@ -122,28 +122,8 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
|
||||
*
|
||||
* No such restriction exists for Haswell.
|
||||
*/
|
||||
if (!brw->is_haswell) {
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
|
||||
/* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
|
||||
* CS Stall):
|
||||
*
|
||||
* One of the following must also be set:
|
||||
* - Render Target Cache Flush Enable ([12] of DW1)
|
||||
* - Depth Cache Flush Enable ([0] of DW1)
|
||||
* - Stall at Pixel Scoreboard ([1] of DW1)
|
||||
* - Depth Stall ([13] of DW1)
|
||||
* - Post-Sync Operation ([13] of DW1)
|
||||
*
|
||||
* We choose to do a Post-Sync Operation (Write Immediate Data), since
|
||||
* it seems like it will incur the least additional performance penalty.
|
||||
*/
|
||||
OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
|
||||
OUT_RELOC(brw->batch.workaround_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
if (!brw->is_haswell)
|
||||
gen7_emit_cs_stall_flush(brw);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state gen7_push_constant_space = {
|
||||
|
||||
@@ -511,6 +511,36 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emit a PIPE_CONTROL command for gen7 with the CS Stall bit set.
|
||||
*/
|
||||
void
|
||||
gen7_emit_cs_stall_flush(struct brw_context *brw)
|
||||
{
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
|
||||
/* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20]
|
||||
* CS Stall):
|
||||
*
|
||||
* One of the following must also be set:
|
||||
* - Render Target Cache Flush Enable ([12] of DW1)
|
||||
* - Depth Cache Flush Enable ([0] of DW1)
|
||||
* - Stall at Pixel Scoreboard ([1] of DW1)
|
||||
* - Depth Stall ([13] of DW1)
|
||||
* - Post-Sync Operation ([13] of DW1)
|
||||
*
|
||||
* We choose to do a Post-Sync Operation (Write Immediate Data), since
|
||||
* it seems like it will incur the least additional performance penalty.
|
||||
*/
|
||||
OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
|
||||
OUT_RELOC(brw->batch.workaround_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emits a PIPE_CONTROL with a non-zero post-sync operation, for
|
||||
* implementing two workarounds on gen6. From section 1.4.7.1
|
||||
|
||||
@@ -59,6 +59,7 @@ void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
|
||||
void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
|
||||
void intel_emit_depth_stall_flushes(struct brw_context *brw);
|
||||
void gen7_emit_vs_workaround_flush(struct brw_context *brw);
|
||||
void gen7_emit_cs_stall_flush(struct brw_context *brw);
|
||||
|
||||
static INLINE uint32_t float_as_int(float f)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user