diff --git a/src/gallium/drivers/iris/iris_batch.h b/src/gallium/drivers/iris/iris_batch.h index 7f0179002ab..cf34e7f3472 100644 --- a/src/gallium/drivers/iris/iris_batch.h +++ b/src/gallium/drivers/iris/iris_batch.h @@ -162,6 +162,13 @@ struct iris_batch { */ uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS]; + /** + * A vector representing the cache coherency status of the L3. For each + * cache domain i, l3_coherent_seqnos[i] denotes the seqno of the most + * recent flush of that domain which is visible to L3 clients. + */ + uint64_t l3_coherent_seqnos[NUM_IRIS_DOMAINS]; + /** * Sequence number used to track the completion of any subsequent memory * operations in the batch until the next sync boundary. @@ -351,7 +358,12 @@ static inline void iris_batch_mark_flush_sync(struct iris_batch *batch, enum iris_domain access) { - batch->coherent_seqnos[access][access] = batch->next_seqno - 1; + const struct intel_device_info *devinfo = &batch->screen->devinfo; + + if (iris_domain_is_l3_coherent(devinfo, access)) + batch->l3_coherent_seqnos[access] = batch->next_seqno - 1; + else + batch->coherent_seqnos[access][access] = batch->next_seqno - 1; } /** @@ -363,8 +375,38 @@ static inline void iris_batch_mark_invalidate_sync(struct iris_batch *batch, enum iris_domain access) { - for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) - batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i]; + const struct intel_device_info *devinfo = &batch->screen->devinfo; + + for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) { + if (i == access) + continue; + + if (iris_domain_is_l3_coherent(devinfo, access)) { + if (iris_domain_is_read_only(access)) { + /* Invalidating a L3-coherent read-only domain "access" also + * triggers an invalidation of any matching L3 cachelines as well. + * + * If domain 'i' is L3-coherent, it sees the latest data in L3, + * otherwise it sees the latest globally-observable data. + */ + batch->coherent_seqnos[access][i] = + iris_domain_is_l3_coherent(devinfo, i) ? + batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i]; + } else { + /* Invalidating L3-coherent write domains does not trigger + * an invalidation of any matching L3 cachelines, however. + * + * It sees the latest data from domain i visible to L3 clients. + */ + batch->coherent_seqnos[access][i] = batch->l3_coherent_seqnos[i]; + } + } else { + /* "access" isn't L3-coherent, so invalidating it means it sees the + * most recent globally-observable data from domain i. + */ + batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i]; + } + } } /** @@ -375,9 +417,11 @@ iris_batch_mark_invalidate_sync(struct iris_batch *batch, static inline void iris_batch_mark_reset_sync(struct iris_batch *batch) { - for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) + for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) { + batch->l3_coherent_seqnos[i] = batch->next_seqno - 1; for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++) batch->coherent_seqnos[i][j] = batch->next_seqno - 1; + } } const char * diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 91f128d121b..7b73c7be06b 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -358,6 +358,10 @@ enum pipe_control_flags PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \ PIPE_CONTROL_INSTRUCTION_INVALIDATE) +#define PIPE_CONTROL_L3_RO_INVALIDATE_BITS \ + (PIPE_CONTROL_L3_READ_ONLY_CACHE_INVALIDATE | \ + PIPE_CONTROL_CONST_CACHE_INVALIDATE) + enum iris_predicate_state { /* The first two states are used if we can determine whether to draw * without having to look at the values in the query object buffer. This diff --git a/src/gallium/drivers/iris/iris_pipe_control.c b/src/gallium/drivers/iris/iris_pipe_control.c index e9ed766e26e..bf6cf5909b1 100644 --- a/src/gallium/drivers/iris/iris_pipe_control.c +++ b/src/gallium/drivers/iris/iris_pipe_control.c @@ -184,8 +184,11 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch, struct iris_bo *bo, enum iris_domain access) { + const struct intel_device_info *devinfo = &batch->screen->devinfo; const struct brw_compiler *compiler = batch->screen->compiler; + const bool access_via_l3 = iris_domain_is_l3_coherent(devinfo, access); + const uint32_t all_flush_bits = (PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_STALL_AT_SCOREBOARD | PIPE_CONTROL_FLUSH_ENABLE); @@ -211,6 +214,11 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE : PIPE_CONTROL_DATA_CACHE_FLUSH), }; + const uint32_t l3_flush_bits[NUM_IRIS_DOMAINS] = { + [IRIS_DOMAIN_RENDER_WRITE] = PIPE_CONTROL_TILE_CACHE_FLUSH, + [IRIS_DOMAIN_DEPTH_WRITE] = PIPE_CONTROL_TILE_CACHE_FLUSH, + [IRIS_DOMAIN_DATA_WRITE] = PIPE_CONTROL_DATA_CACHE_FLUSH, + }; uint32_t bits = 0; /* Iterate over all read/write domains first in order to handle RaW @@ -219,6 +227,8 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch, */ for (unsigned i = 0; i < IRIS_DOMAIN_OTHER_WRITE; i++) { assert(!iris_domain_is_read_only(i)); + assert(iris_domain_is_l3_coherent(devinfo, i)); + if (i != access) { const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]); @@ -230,8 +240,19 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch, if (seqno > batch->coherent_seqnos[access][i]) { bits |= invalidate_bits[access]; - if (seqno > batch->coherent_seqnos[i][i]) - bits |= flush_bits[i]; + if (access_via_l3) { + /* Both domains share L3. If the most recent read/write access + * in domain `i' isn't visible to L3, then flush it to L3. + */ + if (seqno > batch->l3_coherent_seqnos[i]) + bits |= flush_bits[i]; + } else { + /* Domain `i` is L3 coherent but the specified domain is not. + * Flush both this cache and L3 out to memory. + */ + if (seqno > batch->coherent_seqnos[i][i]) + bits |= flush_bits[i] | l3_flush_bits[i]; + } } } } @@ -246,10 +267,14 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch, assert(iris_domain_is_read_only(i)); const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]); + const uint64_t last_visible_seqno = + iris_domain_is_l3_coherent(devinfo, i) ? + batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i]; + /* Flush if the most recent access from this domain occurred * after its most recent flush. */ - if (seqno > batch->coherent_seqnos[i][i]) + if (seqno > last_visible_seqno) bits |= flush_bits[i]; } } @@ -262,6 +287,8 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch, const unsigned i = IRIS_DOMAIN_OTHER_WRITE; const uint64_t seqno = READ_ONCE(bo->last_seqnos[i]); + assert(!iris_domain_is_l3_coherent(devinfo, i)); + /* Invalidate unless the most recent read/write access from this * domain is already guaranteed to be visible to the specified * domain. Flush if the most recent access from this domain @@ -270,6 +297,14 @@ iris_emit_buffer_barrier_for(struct iris_batch *batch, if (seqno > batch->coherent_seqnos[access][i]) { bits |= invalidate_bits[access]; + /* There is a non-L3-coherent write that isn't visible to the + * specified domain. If the access is via L3, then it might see + * stale L3 data that was loaded before that write. In this case, + * we try to invalidate all read-only sections of the L3 cache. + */ + if (access_via_l3 && seqno > batch->l3_coherent_seqnos[i]) + bits |= PIPE_CONTROL_L3_RO_INVALIDATE_BITS; + if (seqno > batch->coherent_seqnos[i][i]) bits |= flush_bits[i]; } diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 45c60aa8166..58cf5badb72 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -7592,6 +7592,8 @@ iris_rebind_buffer(struct iris_context *ice, static void batch_mark_sync_for_pipe_control(struct iris_batch *batch, uint32_t flags) { + const struct intel_device_info *devinfo = &batch->screen->devinfo; + iris_batch_sync_boundary(batch); if ((flags & PIPE_CONTROL_CS_STALL)) { @@ -7601,8 +7603,24 @@ batch_mark_sync_for_pipe_control(struct iris_batch *batch, uint32_t flags) if ((flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH)) iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_DEPTH_WRITE); - if ((flags & PIPE_CONTROL_DATA_CACHE_FLUSH)) + if ((flags & PIPE_CONTROL_TILE_CACHE_FLUSH)) { + /* A tile cache flush makes any C/Z data in L3 visible to memory. */ + const unsigned c = IRIS_DOMAIN_RENDER_WRITE; + const unsigned z = IRIS_DOMAIN_DEPTH_WRITE; + batch->coherent_seqnos[c][c] = batch->l3_coherent_seqnos[c]; + batch->coherent_seqnos[z][z] = batch->l3_coherent_seqnos[z]; + } + + if (flags & (PIPE_CONTROL_FLUSH_HDC | PIPE_CONTROL_DATA_CACHE_FLUSH)) { + /* HDC and DC flushes both flush the data cache out to L3 */ iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_DATA_WRITE); + } + + if ((flags & PIPE_CONTROL_DATA_CACHE_FLUSH)) { + /* A DC flush also flushes L3 data cache lines out to memory. */ + const unsigned i = IRIS_DOMAIN_DATA_WRITE; + batch->coherent_seqnos[i][i] = batch->l3_coherent_seqnos[i]; + } if ((flags & PIPE_CONTROL_FLUSH_ENABLE)) iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_OTHER_WRITE); @@ -7652,6 +7670,16 @@ batch_mark_sync_for_pipe_control(struct iris_batch *batch, uint32_t flags) iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_PULL_CONSTANT_READ); /* IRIS_DOMAIN_OTHER_READ no longer uses any caches. */ + + if ((flags & PIPE_CONTROL_L3_RO_INVALIDATE_BITS) == PIPE_CONTROL_L3_RO_INVALIDATE_BITS) { + /* If we just invalidated the read-only lines of L3, then writes from non-L3-coherent + * domains will now be visible to those L3 clients. + */ + for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) { + if (!iris_domain_is_l3_coherent(devinfo, i)) + batch->l3_coherent_seqnos[i] = batch->coherent_seqnos[i][i]; + } + } } static unsigned