r600g: suballocate memory for the STRMOUT_BUFFER_FILLED_SIZE register
Instead of having a 4-byte buffer for each streamout target, we suballocate each dword from a 4K buffer. This further reduces the overall number of relocations. Tested-by: Aaron Watry <awatry@gmail.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -175,7 +175,9 @@ struct r600_so_target {
|
||||
struct pipe_stream_output_target b;
|
||||
|
||||
/* The buffer where BUFFER_FILLED_SIZE is stored. */
|
||||
struct r600_resource *filled_size;
|
||||
struct r600_resource *buf_filled_size;
|
||||
unsigned buf_filled_size_offset;
|
||||
|
||||
unsigned stride_in_dw;
|
||||
unsigned so_index;
|
||||
};
|
||||
|
||||
@@ -1005,7 +1005,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
|
||||
|
||||
if (ctx->streamout_append_bitmask & (1 << i)) {
|
||||
va = r600_resource_va(&ctx->screen->screen,
|
||||
(void*)t[i]->filled_size);
|
||||
(void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
|
||||
/* Append. */
|
||||
cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
|
||||
cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
|
||||
@@ -1017,7 +1017,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
|
||||
|
||||
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
|
||||
cs->buf[cs->cdw++] =
|
||||
r600_context_bo_reloc(ctx, t[i]->filled_size,
|
||||
r600_context_bo_reloc(ctx, t[i]->buf_filled_size,
|
||||
RADEON_USAGE_READ);
|
||||
} else {
|
||||
/* Start from the beginning. */
|
||||
@@ -1054,7 +1054,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
|
||||
for (i = 0; i < ctx->num_so_targets; i++) {
|
||||
if (t[i]) {
|
||||
va = r600_resource_va(&ctx->screen->screen,
|
||||
(void*)t[i]->filled_size);
|
||||
(void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
|
||||
cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
|
||||
cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
|
||||
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
|
||||
@@ -1066,7 +1066,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
|
||||
|
||||
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
|
||||
cs->buf[cs->cdw++] =
|
||||
r600_context_bo_reloc(ctx, t[i]->filled_size,
|
||||
r600_context_bo_reloc(ctx, t[i]->buf_filled_size,
|
||||
RADEON_USAGE_WRITE);
|
||||
|
||||
}
|
||||
|
||||
@@ -185,6 +185,9 @@ static void r600_destroy_context(struct pipe_context *context)
|
||||
if (rctx->uploader) {
|
||||
u_upload_destroy(rctx->uploader);
|
||||
}
|
||||
if (rctx->allocator_so_filled_size) {
|
||||
u_suballocator_destroy(rctx->allocator_so_filled_size);
|
||||
}
|
||||
util_slab_destroy(&rctx->pool_transfers);
|
||||
|
||||
r600_release_command_buffer(&rctx->start_cs_cmd);
|
||||
@@ -291,6 +294,11 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
|
||||
if (!rctx->uploader)
|
||||
goto fail;
|
||||
|
||||
rctx->allocator_so_filled_size = u_suballocator_create(&rctx->context, 4096, 4,
|
||||
0, PIPE_USAGE_STATIC, TRUE);
|
||||
if (!rctx->allocator_so_filled_size)
|
||||
goto fail;
|
||||
|
||||
rctx->blitter = util_blitter_create(&rctx->context);
|
||||
if (rctx->blitter == NULL)
|
||||
goto fail;
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
|
||||
#include "util/u_blitter.h"
|
||||
#include "util/u_slab.h"
|
||||
#include "util/u_suballoc.h"
|
||||
#include "r600.h"
|
||||
#include "r600_llvm.h"
|
||||
#include "r600_public.h"
|
||||
@@ -389,6 +390,7 @@ struct r600_context {
|
||||
struct radeon_winsys_cs *cs;
|
||||
struct blitter_context *blitter;
|
||||
struct u_upload_mgr *uploader;
|
||||
struct u_suballocator *allocator_so_filled_size;
|
||||
struct util_slab_mempool pool_transfers;
|
||||
|
||||
/* Hardware info. */
|
||||
|
||||
@@ -956,25 +956,25 @@ r600_create_so_target(struct pipe_context *ctx,
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
struct r600_so_target *t;
|
||||
void *ptr;
|
||||
|
||||
t = CALLOC_STRUCT(r600_so_target);
|
||||
if (!t) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
u_suballocator_alloc(rctx->allocator_so_filled_size, 4,
|
||||
&t->buf_filled_size_offset,
|
||||
(struct pipe_resource**)&t->buf_filled_size);
|
||||
if (!t->buf_filled_size) {
|
||||
FREE(t);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
t->b.reference.count = 1;
|
||||
t->b.context = ctx;
|
||||
pipe_resource_reference(&t->b.buffer, buffer);
|
||||
t->b.buffer_offset = buffer_offset;
|
||||
t->b.buffer_size = buffer_size;
|
||||
|
||||
t->filled_size = (struct r600_resource*)
|
||||
pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STATIC, 4);
|
||||
ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
|
||||
memset(ptr, 0, t->filled_size->buf->size);
|
||||
rctx->ws->buffer_unmap(t->filled_size->cs_buf);
|
||||
|
||||
return &t->b;
|
||||
}
|
||||
|
||||
@@ -983,7 +983,7 @@ static void r600_so_target_destroy(struct pipe_context *ctx,
|
||||
{
|
||||
struct r600_so_target *t = (struct r600_so_target*)target;
|
||||
pipe_resource_reference(&t->b.buffer, NULL);
|
||||
pipe_resource_reference((struct pipe_resource**)&t->filled_size, NULL);
|
||||
pipe_resource_reference((struct pipe_resource**)&t->buf_filled_size, NULL);
|
||||
FREE(t);
|
||||
}
|
||||
|
||||
@@ -1308,7 +1308,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
||||
} else {
|
||||
if (info.count_from_stream_output) {
|
||||
struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;
|
||||
uint64_t va = r600_resource_va(&rctx->screen->screen, (void*)t->filled_size);
|
||||
uint64_t va = r600_resource_va(&rctx->screen->screen, (void*)t->buf_filled_size) + t->buf_filled_size_offset;
|
||||
|
||||
r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
|
||||
|
||||
@@ -1320,7 +1320,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
||||
cs->buf[cs->cdw++] = 0; /* unused */
|
||||
|
||||
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
|
||||
cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->filled_size, RADEON_USAGE_READ);
|
||||
cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->buf_filled_size, RADEON_USAGE_READ);
|
||||
}
|
||||
|
||||
cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->predicate_drawing);
|
||||
|
||||
Reference in New Issue
Block a user