r600g: suballocate memory for the STRMOUT_BUFFER_FILLED_SIZE register

Instead of having a 4-byte buffer for each streamout target, we suballocate
each dword from a 4K buffer.

This further reduces the overall number of relocations.

Tested-by: Aaron Watry <awatry@gmail.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Marek Olšák
2012-12-09 17:56:26 +01:00
parent cc2d908572
commit 8df3855eed
5 changed files with 28 additions and 16 deletions
+3 -1
View File
@@ -175,7 +175,9 @@ struct r600_so_target {
struct pipe_stream_output_target b;
/* The buffer where BUFFER_FILLED_SIZE is stored. */
struct r600_resource *filled_size;
struct r600_resource *buf_filled_size;
unsigned buf_filled_size_offset;
unsigned stride_in_dw;
unsigned so_index;
};
+4 -4
View File
@@ -1005,7 +1005,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
if (ctx->streamout_append_bitmask & (1 << i)) {
va = r600_resource_va(&ctx->screen->screen,
(void*)t[i]->filled_size);
(void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
/* Append. */
cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
@@ -1017,7 +1017,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] =
r600_context_bo_reloc(ctx, t[i]->filled_size,
r600_context_bo_reloc(ctx, t[i]->buf_filled_size,
RADEON_USAGE_READ);
} else {
/* Start from the beginning. */
@@ -1054,7 +1054,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
for (i = 0; i < ctx->num_so_targets; i++) {
if (t[i]) {
va = r600_resource_va(&ctx->screen->screen,
(void*)t[i]->filled_size);
(void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
@@ -1066,7 +1066,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] =
r600_context_bo_reloc(ctx, t[i]->filled_size,
r600_context_bo_reloc(ctx, t[i]->buf_filled_size,
RADEON_USAGE_WRITE);
}
+8
View File
@@ -185,6 +185,9 @@ static void r600_destroy_context(struct pipe_context *context)
if (rctx->uploader) {
u_upload_destroy(rctx->uploader);
}
if (rctx->allocator_so_filled_size) {
u_suballocator_destroy(rctx->allocator_so_filled_size);
}
util_slab_destroy(&rctx->pool_transfers);
r600_release_command_buffer(&rctx->start_cs_cmd);
@@ -291,6 +294,11 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
if (!rctx->uploader)
goto fail;
rctx->allocator_so_filled_size = u_suballocator_create(&rctx->context, 4096, 4,
0, PIPE_USAGE_STATIC, TRUE);
if (!rctx->allocator_so_filled_size)
goto fail;
rctx->blitter = util_blitter_create(&rctx->context);
if (rctx->blitter == NULL)
goto fail;
+2
View File
@@ -28,6 +28,7 @@
#include "util/u_blitter.h"
#include "util/u_slab.h"
#include "util/u_suballoc.h"
#include "r600.h"
#include "r600_llvm.h"
#include "r600_public.h"
@@ -389,6 +390,7 @@ struct r600_context {
struct radeon_winsys_cs *cs;
struct blitter_context *blitter;
struct u_upload_mgr *uploader;
struct u_suballocator *allocator_so_filled_size;
struct util_slab_mempool pool_transfers;
/* Hardware info. */
+11 -11
View File
@@ -956,25 +956,25 @@ r600_create_so_target(struct pipe_context *ctx,
{
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_so_target *t;
void *ptr;
t = CALLOC_STRUCT(r600_so_target);
if (!t) {
return NULL;
}
u_suballocator_alloc(rctx->allocator_so_filled_size, 4,
&t->buf_filled_size_offset,
(struct pipe_resource**)&t->buf_filled_size);
if (!t->buf_filled_size) {
FREE(t);
return NULL;
}
t->b.reference.count = 1;
t->b.context = ctx;
pipe_resource_reference(&t->b.buffer, buffer);
t->b.buffer_offset = buffer_offset;
t->b.buffer_size = buffer_size;
t->filled_size = (struct r600_resource*)
pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STATIC, 4);
ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
memset(ptr, 0, t->filled_size->buf->size);
rctx->ws->buffer_unmap(t->filled_size->cs_buf);
return &t->b;
}
@@ -983,7 +983,7 @@ static void r600_so_target_destroy(struct pipe_context *ctx,
{
struct r600_so_target *t = (struct r600_so_target*)target;
pipe_resource_reference(&t->b.buffer, NULL);
pipe_resource_reference((struct pipe_resource**)&t->filled_size, NULL);
pipe_resource_reference((struct pipe_resource**)&t->buf_filled_size, NULL);
FREE(t);
}
@@ -1308,7 +1308,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
} else {
if (info.count_from_stream_output) {
struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;
uint64_t va = r600_resource_va(&rctx->screen->screen, (void*)t->filled_size);
uint64_t va = r600_resource_va(&rctx->screen->screen, (void*)t->buf_filled_size) + t->buf_filled_size_offset;
r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
@@ -1320,7 +1320,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
cs->buf[cs->cdw++] = 0; /* unused */
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->filled_size, RADEON_USAGE_READ);
cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->buf_filled_size, RADEON_USAGE_READ);
}
cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->predicate_drawing);