|
|
|
|
@@ -521,11 +521,9 @@ static struct pipe_query *si_query_sw_create(unsigned query_type)
|
|
|
|
|
return (struct pipe_query *)query;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void si_query_hw_destroy(struct si_screen *sscreen,
|
|
|
|
|
struct si_query *rquery)
|
|
|
|
|
void si_query_buffer_destroy(struct si_screen *sscreen, struct si_query_buffer *buffer)
|
|
|
|
|
{
|
|
|
|
|
struct si_query_hw *query = (struct si_query_hw *)rquery;
|
|
|
|
|
struct si_query_buffer *prev = query->buffer.previous;
|
|
|
|
|
struct si_query_buffer *prev = buffer->previous;
|
|
|
|
|
|
|
|
|
|
/* Release all query buffers. */
|
|
|
|
|
while (prev) {
|
|
|
|
|
@@ -535,58 +533,103 @@ void si_query_hw_destroy(struct si_screen *sscreen,
|
|
|
|
|
FREE(qbuf);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
r600_resource_reference(&query->buffer.buf, NULL);
|
|
|
|
|
r600_resource_reference(&query->workaround_buf, NULL);
|
|
|
|
|
FREE(rquery);
|
|
|
|
|
r600_resource_reference(&buffer->buf, NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct r600_resource *si_new_query_buffer(struct si_screen *sscreen,
|
|
|
|
|
struct si_query_hw *query)
|
|
|
|
|
void si_query_buffer_reset(struct si_context *sctx, struct si_query_buffer *buffer)
|
|
|
|
|
{
|
|
|
|
|
unsigned buf_size = MAX2(query->result_size,
|
|
|
|
|
sscreen->info.min_alloc_size);
|
|
|
|
|
/* Discard all query buffers except for the oldest. */
|
|
|
|
|
while (buffer->previous) {
|
|
|
|
|
struct si_query_buffer *qbuf = buffer->previous;
|
|
|
|
|
buffer->previous = qbuf->previous;
|
|
|
|
|
|
|
|
|
|
r600_resource_reference(&buffer->buf, NULL);
|
|
|
|
|
buffer->buf = qbuf->buf; /* move ownership */
|
|
|
|
|
FREE(qbuf);
|
|
|
|
|
}
|
|
|
|
|
buffer->results_end = 0;
|
|
|
|
|
|
|
|
|
|
/* Discard even the oldest buffer if it can't be mapped without a stall. */
|
|
|
|
|
if (buffer->buf &&
|
|
|
|
|
(si_rings_is_buffer_referenced(sctx, buffer->buf->buf, RADEON_USAGE_READWRITE) ||
|
|
|
|
|
!sctx->ws->buffer_wait(buffer->buf->buf, 0, RADEON_USAGE_READWRITE))) {
|
|
|
|
|
r600_resource_reference(&buffer->buf, NULL);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool si_query_buffer_alloc(struct si_context *sctx, struct si_query_buffer *buffer,
|
|
|
|
|
bool (*prepare_buffer)(struct si_context *, struct si_query_buffer*),
|
|
|
|
|
unsigned size)
|
|
|
|
|
{
|
|
|
|
|
if (buffer->buf && buffer->results_end + size >= buffer->buf->b.b.width0)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
if (buffer->buf) {
|
|
|
|
|
struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer);
|
|
|
|
|
memcpy(qbuf, buffer, sizeof(*qbuf));
|
|
|
|
|
buffer->previous = qbuf;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
buffer->results_end = 0;
|
|
|
|
|
|
|
|
|
|
/* Queries are normally read by the CPU after
|
|
|
|
|
* being written by the gpu, hence staging is probably a good
|
|
|
|
|
* usage pattern.
|
|
|
|
|
*/
|
|
|
|
|
struct r600_resource *buf = r600_resource(
|
|
|
|
|
pipe_buffer_create(&sscreen->b, 0,
|
|
|
|
|
PIPE_USAGE_STAGING, buf_size));
|
|
|
|
|
if (!buf)
|
|
|
|
|
return NULL;
|
|
|
|
|
struct si_screen *screen = sctx->screen;
|
|
|
|
|
unsigned buf_size = MAX2(size, screen->info.min_alloc_size);
|
|
|
|
|
buffer->buf = r600_resource(
|
|
|
|
|
pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
|
|
|
|
|
if (unlikely(!buffer->buf))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (!query->ops->prepare_buffer(sscreen, query, buf)) {
|
|
|
|
|
r600_resource_reference(&buf, NULL);
|
|
|
|
|
return NULL;
|
|
|
|
|
if (prepare_buffer) {
|
|
|
|
|
if (unlikely(!prepare_buffer(sctx, buffer))) {
|
|
|
|
|
r600_resource_reference(&buffer->buf, NULL);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return buf;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool si_query_hw_prepare_buffer(struct si_screen *sscreen,
|
|
|
|
|
struct si_query_hw *query,
|
|
|
|
|
struct r600_resource *buffer)
|
|
|
|
|
|
|
|
|
|
void si_query_hw_destroy(struct si_screen *sscreen,
|
|
|
|
|
struct si_query *rquery)
|
|
|
|
|
{
|
|
|
|
|
/* Callers ensure that the buffer is currently unused by the GPU. */
|
|
|
|
|
uint32_t *results = sscreen->ws->buffer_map(buffer->buf, NULL,
|
|
|
|
|
struct si_query_hw *query = (struct si_query_hw *)rquery;
|
|
|
|
|
|
|
|
|
|
si_query_buffer_destroy(sscreen, &query->buffer);
|
|
|
|
|
r600_resource_reference(&query->workaround_buf, NULL);
|
|
|
|
|
FREE(rquery);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool si_query_hw_prepare_buffer(struct si_context *sctx,
|
|
|
|
|
struct si_query_buffer *qbuf)
|
|
|
|
|
{
|
|
|
|
|
static const struct si_query_hw si_query_hw_s;
|
|
|
|
|
struct si_query_hw *query = container_of(qbuf, &si_query_hw_s, buffer);
|
|
|
|
|
struct si_screen *screen = sctx->screen;
|
|
|
|
|
|
|
|
|
|
/* The caller ensures that the buffer is currently unused by the GPU. */
|
|
|
|
|
uint32_t *results = screen->ws->buffer_map(qbuf->buf->buf, NULL,
|
|
|
|
|
PIPE_TRANSFER_WRITE |
|
|
|
|
|
PIPE_TRANSFER_UNSYNCHRONIZED);
|
|
|
|
|
if (!results)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
memset(results, 0, buffer->b.b.width0);
|
|
|
|
|
memset(results, 0, qbuf->buf->b.b.width0);
|
|
|
|
|
|
|
|
|
|
if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
|
|
|
|
|
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
|
|
|
|
|
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
|
|
|
|
|
unsigned max_rbs = sscreen->info.num_render_backends;
|
|
|
|
|
unsigned enabled_rb_mask = sscreen->info.enabled_rb_mask;
|
|
|
|
|
unsigned max_rbs = screen->info.num_render_backends;
|
|
|
|
|
unsigned enabled_rb_mask = screen->info.enabled_rb_mask;
|
|
|
|
|
unsigned num_results;
|
|
|
|
|
unsigned i, j;
|
|
|
|
|
|
|
|
|
|
/* Set top bits for unused backends. */
|
|
|
|
|
num_results = buffer->b.b.width0 / query->result_size;
|
|
|
|
|
num_results = qbuf->buf->b.b.width0 / query->result_size;
|
|
|
|
|
for (j = 0; j < num_results; j++) {
|
|
|
|
|
for (i = 0; i < max_rbs; i++) {
|
|
|
|
|
if (!(enabled_rb_mask & (1<<i))) {
|
|
|
|
|
@@ -631,16 +674,6 @@ static struct si_query_hw_ops query_hw_default_hw_ops = {
|
|
|
|
|
.add_result = si_query_hw_add_result,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
bool si_query_hw_init(struct si_screen *sscreen,
|
|
|
|
|
struct si_query_hw *query)
|
|
|
|
|
{
|
|
|
|
|
query->buffer.buf = si_new_query_buffer(sscreen, query);
|
|
|
|
|
if (!query->buffer.buf)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct pipe_query *si_query_hw_create(struct si_screen *sscreen,
|
|
|
|
|
unsigned query_type,
|
|
|
|
|
unsigned index)
|
|
|
|
|
@@ -700,11 +733,6 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen,
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!si_query_hw_init(sscreen, query)) {
|
|
|
|
|
FREE(query);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return (struct pipe_query *)query;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -809,8 +837,9 @@ static void si_query_hw_emit_start(struct si_context *sctx,
|
|
|
|
|
{
|
|
|
|
|
uint64_t va;
|
|
|
|
|
|
|
|
|
|
if (!query->buffer.buf)
|
|
|
|
|
return; // previous buffer allocation failure
|
|
|
|
|
if (!si_query_buffer_alloc(sctx, &query->buffer, query->ops->prepare_buffer,
|
|
|
|
|
query->result_size))
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
si_update_occlusion_query_state(sctx, query->b.type, 1);
|
|
|
|
|
si_update_prims_generated_query_state(sctx, query->b.type, 1);
|
|
|
|
|
@@ -818,20 +847,7 @@ static void si_query_hw_emit_start(struct si_context *sctx,
|
|
|
|
|
if (query->b.type != SI_QUERY_TIME_ELAPSED_SDMA)
|
|
|
|
|
si_need_gfx_cs_space(sctx);
|
|
|
|
|
|
|
|
|
|
/* Get a new query buffer if needed. */
|
|
|
|
|
if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
|
|
|
|
|
struct si_query_buffer *qbuf = MALLOC_STRUCT(si_query_buffer);
|
|
|
|
|
*qbuf = query->buffer;
|
|
|
|
|
query->buffer.results_end = 0;
|
|
|
|
|
query->buffer.previous = qbuf;
|
|
|
|
|
query->buffer.buf = si_new_query_buffer(sctx->screen, query);
|
|
|
|
|
if (!query->buffer.buf)
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* emit begin query */
|
|
|
|
|
va = query->buffer.buf->gpu_address + query->buffer.results_end;
|
|
|
|
|
|
|
|
|
|
query->ops->emit_start(sctx, query, query->buffer.buf, va);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -912,13 +928,17 @@ static void si_query_hw_emit_stop(struct si_context *sctx,
|
|
|
|
|
{
|
|
|
|
|
uint64_t va;
|
|
|
|
|
|
|
|
|
|
/* The queries which need begin already called this in begin_query. */
|
|
|
|
|
if (query->flags & SI_QUERY_HW_FLAG_NO_START) {
|
|
|
|
|
si_need_gfx_cs_space(sctx);
|
|
|
|
|
if (!si_query_buffer_alloc(sctx, &query->buffer, query->ops->prepare_buffer,
|
|
|
|
|
query->result_size))
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!query->buffer.buf)
|
|
|
|
|
return; // previous buffer allocation failure
|
|
|
|
|
|
|
|
|
|
/* The queries which need begin already called this in begin_query. */
|
|
|
|
|
if (query->flags & SI_QUERY_HW_FLAG_NO_START)
|
|
|
|
|
si_need_gfx_cs_space(sctx);
|
|
|
|
|
|
|
|
|
|
/* emit end query */
|
|
|
|
|
va = query->buffer.buf->gpu_address + query->buffer.results_end;
|
|
|
|
|
|
|
|
|
|
@@ -1061,33 +1081,6 @@ static boolean si_begin_query(struct pipe_context *ctx,
|
|
|
|
|
return rquery->ops->begin(sctx, rquery);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void si_query_hw_reset_buffers(struct si_context *sctx,
|
|
|
|
|
struct si_query_hw *query)
|
|
|
|
|
{
|
|
|
|
|
struct si_query_buffer *prev = query->buffer.previous;
|
|
|
|
|
|
|
|
|
|
/* Discard the old query buffers. */
|
|
|
|
|
while (prev) {
|
|
|
|
|
struct si_query_buffer *qbuf = prev;
|
|
|
|
|
prev = prev->previous;
|
|
|
|
|
r600_resource_reference(&qbuf->buf, NULL);
|
|
|
|
|
FREE(qbuf);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
query->buffer.results_end = 0;
|
|
|
|
|
query->buffer.previous = NULL;
|
|
|
|
|
|
|
|
|
|
/* Obtain a new buffer if the current one can't be mapped without a stall. */
|
|
|
|
|
if (si_rings_is_buffer_referenced(sctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
|
|
|
|
|
!sctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
|
|
|
|
|
r600_resource_reference(&query->buffer.buf, NULL);
|
|
|
|
|
query->buffer.buf = si_new_query_buffer(sctx->screen, query);
|
|
|
|
|
} else {
|
|
|
|
|
if (!query->ops->prepare_buffer(sctx->screen, query, query->buffer.buf))
|
|
|
|
|
r600_resource_reference(&query->buffer.buf, NULL);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool si_query_hw_begin(struct si_context *sctx,
|
|
|
|
|
struct si_query *rquery)
|
|
|
|
|
{
|
|
|
|
|
@@ -1099,7 +1092,7 @@ bool si_query_hw_begin(struct si_context *sctx,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!(query->flags & SI_QUERY_HW_FLAG_BEGIN_RESUMES))
|
|
|
|
|
si_query_hw_reset_buffers(sctx, query);
|
|
|
|
|
si_query_buffer_reset(sctx, &query->buffer);
|
|
|
|
|
|
|
|
|
|
r600_resource_reference(&query->workaround_buf, NULL);
|
|
|
|
|
|
|
|
|
|
@@ -1126,7 +1119,7 @@ bool si_query_hw_end(struct si_context *sctx,
|
|
|
|
|
struct si_query_hw *query = (struct si_query_hw *)rquery;
|
|
|
|
|
|
|
|
|
|
if (query->flags & SI_QUERY_HW_FLAG_NO_START)
|
|
|
|
|
si_query_hw_reset_buffers(sctx, query);
|
|
|
|
|
si_query_buffer_reset(sctx, &query->buffer);
|
|
|
|
|
|
|
|
|
|
si_query_hw_emit_stop(sctx, query);
|
|
|
|
|
|
|
|
|
|
|