st/mesa: eliminate all atomic ops when setting vertex buffers
This implements the same optimization as u_upload_mgr. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8298>
This commit is contained in:
@@ -160,9 +160,8 @@ st_setup_arrays(struct st_context *st,
|
||||
|
||||
/* Set the vertex buffer. */
|
||||
if (binding->BufferObj) {
|
||||
struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj);
|
||||
|
||||
vbuffer[bufidx].buffer.resource = stobj ? stobj->buffer : NULL;
|
||||
vbuffer[bufidx].buffer.resource =
|
||||
st_get_buffer_reference(ctx, binding->BufferObj);
|
||||
vbuffer[bufidx].is_user_buffer = false;
|
||||
vbuffer[bufidx].buffer_offset = binding->Offset +
|
||||
attrib->RelativeOffset;
|
||||
@@ -190,9 +189,8 @@ st_setup_arrays(struct st_context *st,
|
||||
|
||||
if (binding->BufferObj) {
|
||||
/* Set the binding */
|
||||
struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj);
|
||||
|
||||
vbuffer[bufidx].buffer.resource = stobj ? stobj->buffer : NULL;
|
||||
vbuffer[bufidx].buffer.resource =
|
||||
st_get_buffer_reference(ctx, binding->BufferObj);
|
||||
vbuffer[bufidx].is_user_buffer = false;
|
||||
vbuffer[bufidx].buffer_offset = _mesa_draw_binding_offset(binding);
|
||||
} else {
|
||||
@@ -229,7 +227,7 @@ st_setup_arrays(struct st_context *st,
|
||||
* Return the index of the vertex buffer where current attribs have been
|
||||
* uploaded.
|
||||
*/
|
||||
static int ALWAYS_INLINE
|
||||
static void ALWAYS_INLINE
|
||||
st_setup_current(struct st_context *st,
|
||||
const struct st_vertex_program *vp,
|
||||
const struct st_common_variant *vp_variant,
|
||||
@@ -286,9 +284,7 @@ st_setup_current(struct st_context *st,
|
||||
&vbuffer[bufidx].buffer.resource);
|
||||
/* Always unmap. The uploader might use explicit flushes. */
|
||||
u_upload_unmap(uploader);
|
||||
return bufidx;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -341,8 +337,7 @@ st_update_array(struct st_context *st)
|
||||
|
||||
/* _NEW_CURRENT_ATTRIB */
|
||||
/* Setup zero-stride attribs. */
|
||||
int current_attrib_buffer =
|
||||
st_setup_current(st, vp, vp_variant, &velements, vbuffer, &num_vbuffers);
|
||||
st_setup_current(st, vp, vp_variant, &velements, vbuffer, &num_vbuffers);
|
||||
|
||||
velements.count = vp->num_inputs + vp_variant->key.passthrough_edgeflags;
|
||||
|
||||
@@ -354,12 +349,8 @@ st_update_array(struct st_context *st)
|
||||
cso_set_vertex_buffers_and_elements(cso, &velements,
|
||||
num_vbuffers,
|
||||
unbind_trailing_vbuffers,
|
||||
false,
|
||||
true,
|
||||
uses_user_vertex_buffers,
|
||||
vbuffer);
|
||||
st->last_num_vbuffers = num_vbuffers;
|
||||
|
||||
/* Unreference uploaded current attrib buffer. */
|
||||
if (current_attrib_buffer >= 0)
|
||||
pipe_resource_reference(&vbuffer[current_attrib_buffer].buffer.resource, NULL);
|
||||
}
|
||||
|
||||
@@ -70,6 +70,28 @@ st_bufferobj_alloc(struct gl_context *ctx, GLuint name)
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
release_buffer(struct gl_buffer_object *obj)
|
||||
{
|
||||
struct st_buffer_object *st_obj = st_buffer_object(obj);
|
||||
|
||||
if (!st_obj->buffer)
|
||||
return;
|
||||
|
||||
/* Subtract the remaining private references before unreferencing
|
||||
* the buffer. See the header file for explanation.
|
||||
*/
|
||||
if (st_obj->private_refcount) {
|
||||
assert(st_obj->private_refcount > 0);
|
||||
p_atomic_add(&st_obj->buffer->reference.count,
|
||||
-st_obj->private_refcount);
|
||||
st_obj->private_refcount = 0;
|
||||
}
|
||||
st_obj->ctx = NULL;
|
||||
|
||||
pipe_resource_reference(&st_obj->buffer, NULL);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Deallocate/free a vertex/pixel buffer object.
|
||||
@@ -78,14 +100,9 @@ st_bufferobj_alloc(struct gl_context *ctx, GLuint name)
|
||||
static void
|
||||
st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj)
|
||||
{
|
||||
struct st_buffer_object *st_obj = st_buffer_object(obj);
|
||||
|
||||
assert(obj->RefCount == 0);
|
||||
_mesa_buffer_unmap_all_mappings(ctx, obj);
|
||||
|
||||
if (st_obj->buffer)
|
||||
pipe_resource_reference(&st_obj->buffer, NULL);
|
||||
|
||||
release_buffer(obj);
|
||||
_mesa_delete_buffer_object(ctx, obj);
|
||||
}
|
||||
|
||||
@@ -330,7 +347,7 @@ bufferobj_data(struct gl_context *ctx,
|
||||
st_obj->Base.Usage = usage;
|
||||
st_obj->Base.StorageFlags = storageFlags;
|
||||
|
||||
pipe_resource_reference( &st_obj->buffer, NULL );
|
||||
release_buffer(obj);
|
||||
|
||||
const unsigned bindings = buffer_target_to_bind_flags(target);
|
||||
|
||||
@@ -375,6 +392,8 @@ bufferobj_data(struct gl_context *ctx,
|
||||
st_obj->Base.Size = 0;
|
||||
return GL_FALSE;
|
||||
}
|
||||
|
||||
st_obj->ctx = ctx;
|
||||
}
|
||||
|
||||
/* The current buffer may be bound, so we have to revalidate all atoms that
|
||||
|
||||
@@ -43,6 +43,25 @@ struct st_buffer_object
|
||||
{
|
||||
struct gl_buffer_object Base;
|
||||
struct pipe_resource *buffer; /* GPU storage */
|
||||
|
||||
struct gl_context *ctx; /* the context that owns private_refcount */
|
||||
|
||||
/* This mechanism allows passing buffer references to the driver without
|
||||
* using atomics to increase the reference count.
|
||||
*
|
||||
* This private refcount can be decremented without atomics but only one
|
||||
* context (ctx above) can use this counter to be thread-safe.
|
||||
*
|
||||
* This number is atomically added to buffer->reference.count at
|
||||
* initialization. If it's never used, the same number is atomically
|
||||
* subtracted from buffer->reference.count before destruction. If this
|
||||
* number is decremented, we can pass that reference to the driver without
|
||||
* touching reference.count. At buffer destruction we only subtract
|
||||
* the number of references we did not return. This can possibly turn
|
||||
* a million atomic increments into 1 add and 1 subtract atomic op.
|
||||
*/
|
||||
int private_refcount;
|
||||
|
||||
struct pipe_transfer *transfer[MAP_COUNT];
|
||||
};
|
||||
|
||||
@@ -63,5 +82,37 @@ extern void
|
||||
st_init_bufferobject_functions(struct pipe_screen *screen,
|
||||
struct dd_function_table *functions);
|
||||
|
||||
static inline struct pipe_resource *
|
||||
st_get_buffer_reference(struct gl_context *ctx, struct gl_buffer_object *obj)
|
||||
{
|
||||
if (unlikely(!obj))
|
||||
return NULL;
|
||||
|
||||
struct st_buffer_object *stobj = st_buffer_object(obj);
|
||||
struct pipe_resource *buffer = stobj->buffer;
|
||||
|
||||
if (unlikely(!buffer))
|
||||
return NULL;
|
||||
|
||||
/* Only one context is using the fast path. All other contexts must use
|
||||
* the slow path.
|
||||
*/
|
||||
if (unlikely(stobj->ctx != ctx)) {
|
||||
p_atomic_inc(&buffer->reference.count);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
if (unlikely(stobj->private_refcount <= 0)) {
|
||||
assert(stobj->private_refcount == 0);
|
||||
|
||||
/* This is the number of atomic increments we will skip. */
|
||||
stobj->private_refcount = 100000000;
|
||||
p_atomic_add(&buffer->reference.count, stobj->private_refcount);
|
||||
}
|
||||
|
||||
/* Return a buffer reference while decrementing the private refcount. */
|
||||
stobj->private_refcount--;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user