i965/gen6: Stream the VS push constants.

Improves 3DMMES taiji demo performance by 10.1% +/- 0.9% (n=15).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Eric Anholt
2011-04-22 09:55:25 -07:00
parent 90c70123b8
commit 32cc0c9d8d
5 changed files with 66 additions and 39 deletions
+3
View File
@@ -640,6 +640,9 @@ struct brw_context
uint32_t bind_bo_offset;
uint32_t surf_offset[BRW_VS_MAX_SURF];
GLuint nr_surfaces;
uint32_t push_const_offset; /* Offset in the batchbuffer */
int push_const_size; /* in 256-bit register increments */
} vs;
struct {
+1
View File
@@ -367,6 +367,7 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
int estimated_max_prim_size;
estimated_max_prim_size = 512; /* batchbuffer commands */
estimated_max_prim_size += 1024; /* gen6 VS push constants */
estimated_max_prim_size += 1024; /* gen6 WM push constants */
estimated_max_prim_size += 512; /* misc. pad */
+1
View File
@@ -107,6 +107,7 @@ extern const struct brw_tracked_state gen6_sf_state;
extern const struct brw_tracked_state gen6_sf_vp;
extern const struct brw_tracked_state gen6_urb;
extern const struct brw_tracked_state gen6_viewport_state;
extern const struct brw_tracked_state gen6_vs_constants;
extern const struct brw_tracked_state gen6_vs_state;
extern const struct brw_tracked_state gen6_wm_constants;
extern const struct brw_tracked_state gen6_wm_state;
@@ -129,6 +129,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&brw_vs_constants, /* Before vs_surfaces and constant_buffer */
&brw_wm_constants, /* Before wm_surfaces and constant_buffer */
&gen6_vs_constants, /* Before vs_state */
&gen6_wm_constants, /* Before wm_state */
&brw_vs_surfaces, /* must do before unit */
+60 -39
View File
@@ -34,43 +34,36 @@
#include "intel_batchbuffer.h"
static void
upload_vs_state(struct brw_context *brw)
gen6_prepare_vs_push_constants(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
/* _BRW_NEW_VERTEX_PROGRAM */
const struct brw_vertex_program *vp =
brw_vertex_program_const(brw->vertex_program);
unsigned int nr_params = brw->vs.prog_data->nr_params / 4;
drm_intel_bo *constant_bo;
int i;
if (brw->vertex_program->IsNVProgram)
_mesa_load_tracked_matrices(ctx);
/* Updates the ParamaterValues[i] pointers for all parameters of the
* basic type of PROGRAM_STATE_VAR.
*/
/* XXX: Should this happen somewhere before to get our state flag set? */
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
/* CACHE_NEW_VS_PROG | _NEW_TRANSFORM */
if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
brw->vs.push_const_size = 0;
} else {
int params_uploaded = 0, param_regs;
int params_uploaded = 0;
float *param;
int i;
if (brw->vertex_program->IsNVProgram)
_mesa_load_tracked_matrices(ctx);
/* Updates the ParamaterValues[i] pointers for all parameters of the
* basic type of PROGRAM_STATE_VAR.
*/
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo",
(MAX_CLIP_PLANES + nr_params) *
4 * sizeof(float),
4096);
drm_intel_gem_bo_map_gtt(constant_bo);
param = constant_bo->virtual;
param = brw_state_batch(brw,
(MAX_CLIP_PLANES + nr_params) *
4 * sizeof(float),
32, &brw->vs.push_const_offset);
/* This should be loaded like any other param, but it's ad-hoc
* until we redo the VS backend.
@@ -100,30 +93,56 @@ upload_vs_state(struct brw_context *brw)
if (0) {
printf("VS constant buffer:\n");
for (i = 0; i < params_uploaded; i++) {
float *buf = (float *)constant_bo->virtual + i * 4;
float *buf = param + i * 4;
printf("%d: %f %f %f %f\n",
i, buf[0], buf[1], buf[2], buf[3]);
}
}
drm_intel_gem_bo_unmap_gtt(constant_bo);
brw->vs.push_const_size = (params_uploaded + 1) / 2;
/* We can only push 32 registers of constants at a time. */
assert(brw->vs.push_const_size <= 32);
}
}
param_regs = (params_uploaded + 1) / 2;
assert(param_regs <= 32);
const struct brw_tracked_state gen6_vs_constants = {
.dirty = {
.mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_BATCH |
BRW_NEW_VERTEX_PROGRAM),
.cache = 0,
},
.prepare = gen6_prepare_vs_push_constants,
};
static void
upload_vs_state(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
if (brw->vs.push_const_size == 0) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
OUT_RELOC(constant_bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
param_regs - 1);
OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
/* This is also the set of state flags from gen6_prepare_vs_constants */
OUT_RELOC(intel->batch.bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
brw->vs.push_const_offset +
brw->vs.push_const_size - 1);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
drm_intel_bo_unreference(constant_bo);
}
BEGIN_BATCH(6);
@@ -149,7 +168,9 @@ const struct brw_tracked_state gen6_vs_state = {
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_VS_SURFACES |
BRW_NEW_URB_FENCE |
BRW_NEW_CONTEXT),
BRW_NEW_CONTEXT |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_BATCH),
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_vs_state,