gallium/radeon: add RADEON_USAGE_SYNCHRONIZED

This is really the behavior we want most of the time, but having a
SYNCHRONIZED flag instead of an UNSYNCHRONIZED one has the advantage that
OR'ing different flags together always results in stronger guarantees.

The parent BOs of sub-allocated buffers will be added unsynchronized.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
Nicolai Hähnle
2016-09-07 10:57:56 +02:00
parent 84f156c0cb
commit e703f71ebd
5 changed files with 25 additions and 13 deletions
+11 -8
View File
@@ -1321,7 +1321,7 @@ validate:
tex = r300_resource(fb->cbufs[i]->texture);
assert(tex && tex->buf && "cbuf is marked, but NULL!");
r300->rws->cs_add_buffer(r300->cs, tex->buf,
RADEON_USAGE_READWRITE,
RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED,
r300_surface(fb->cbufs[i])->domain,
tex->b.b.nr_samples > 1 ?
RADEON_PRIO_COLOR_BUFFER_MSAA :
@@ -1332,7 +1332,7 @@ validate:
tex = r300_resource(fb->zsbuf->texture);
assert(tex && tex->buf && "zsbuf is marked, but NULL!");
r300->rws->cs_add_buffer(r300->cs, tex->buf,
RADEON_USAGE_READWRITE,
RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED,
r300_surface(fb->zsbuf)->domain,
tex->b.b.nr_samples > 1 ?
RADEON_PRIO_DEPTH_BUFFER_MSAA :
@@ -1343,7 +1343,7 @@ validate:
if (r300->aa_state.dirty) {
if (aa->dest) {
r300->rws->cs_add_buffer(r300->cs, aa->dest->buf,
RADEON_USAGE_WRITE,
RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED,
aa->dest->domain,
RADEON_PRIO_COLOR_BUFFER);
}
@@ -1356,19 +1356,22 @@ validate:
}
tex = r300_resource(texstate->sampler_views[i]->base.texture);
r300->rws->cs_add_buffer(r300->cs, tex->buf, RADEON_USAGE_READ,
r300->rws->cs_add_buffer(r300->cs, tex->buf,
RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,
tex->domain, RADEON_PRIO_SAMPLER_TEXTURE);
}
}
/* ...occlusion query buffer... */
if (r300->query_current)
r300->rws->cs_add_buffer(r300->cs, r300->query_current->buf,
RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT,
RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED,
RADEON_DOMAIN_GTT,
RADEON_PRIO_QUERY);
/* ...vertex buffer for SWTCL path... */
if (r300->vbo)
r300->rws->cs_add_buffer(r300->cs, r300->vbo,
RADEON_USAGE_READ, RADEON_DOMAIN_GTT,
RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,
RADEON_DOMAIN_GTT,
RADEON_PRIO_VERTEX_BUFFER);
/* ...vertex buffers for HWTCL path... */
if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
@@ -1383,7 +1386,7 @@ validate:
continue;
r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->buf,
RADEON_USAGE_READ,
RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,
r300_resource(buf)->domain,
RADEON_PRIO_SAMPLER_BUFFER);
}
@@ -1391,7 +1394,7 @@ validate:
/* ...and index buffer for HWTCL path. */
if (index_buffer)
r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->buf,
RADEON_USAGE_READ,
RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED,
r300_resource(index_buffer)->domain,
RADEON_PRIO_INDEX_BUFFER);
+4 -2
View File
@@ -73,8 +73,10 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct
enum radeon_bo_priority priority)
{
assert(usage);
return rctx->ws->cs_add_buffer(ring->cs, rbo->buf, usage,
rbo->domains, priority) * 4;
return rctx->ws->cs_add_buffer(
ring->cs, rbo->buf,
(enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
rbo->domains, priority) * 4;
}
/**
+2 -1
View File
@@ -113,7 +113,8 @@ static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
{
int reloc_idx;
reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage, domain,
reloc_idx = dec->ws->cs_add_buffer(dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
domain,
RADEON_PRIO_UVD);
if (!dec->use_legacy) {
uint64_t addr;
+2 -1
View File
@@ -540,7 +540,8 @@ void rvce_add_buffer(struct rvce_encoder *enc, struct pb_buffer *buf,
{
int reloc_idx;
reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage, domain, RADEON_PRIO_VCE);
reloc_idx = enc->ws->cs_add_buffer(enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
domain, RADEON_PRIO_VCE);
if (enc->use_vm) {
uint64_t addr;
addr = enc->ws->buffer_get_virtual_address(buf);
+6 -1
View File
@@ -57,7 +57,12 @@ enum radeon_bo_flag { /* bitfield */
enum radeon_bo_usage { /* bitfield */
RADEON_USAGE_READ = 2,
RADEON_USAGE_WRITE = 4,
RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE
RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE,
/* The winsys ensures that the CS submission will be scheduled after
* previously flushed CSs referencing this BO in a conflicting way.
*/
RADEON_USAGE_SYNCHRONIZED = 8
};
enum ring_type {