i965/gen6: Stream the VS push constants.
Improves 3DMMES taiji demo performance by 10.1% +/- 0.9% (n=15). Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -640,6 +640,9 @@ struct brw_context
|
||||
uint32_t bind_bo_offset;
|
||||
uint32_t surf_offset[BRW_VS_MAX_SURF];
|
||||
GLuint nr_surfaces;
|
||||
|
||||
uint32_t push_const_offset; /* Offset in the batchbuffer */
|
||||
int push_const_size; /* in 256-bit register increments */
|
||||
} vs;
|
||||
|
||||
struct {
|
||||
|
||||
@@ -367,6 +367,7 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
|
||||
int estimated_max_prim_size;
|
||||
|
||||
estimated_max_prim_size = 512; /* batchbuffer commands */
|
||||
estimated_max_prim_size += 1024; /* gen6 VS push constants */
|
||||
estimated_max_prim_size += 1024; /* gen6 WM push constants */
|
||||
estimated_max_prim_size += 512; /* misc. pad */
|
||||
|
||||
|
||||
@@ -107,6 +107,7 @@ extern const struct brw_tracked_state gen6_sf_state;
|
||||
extern const struct brw_tracked_state gen6_sf_vp;
|
||||
extern const struct brw_tracked_state gen6_urb;
|
||||
extern const struct brw_tracked_state gen6_viewport_state;
|
||||
extern const struct brw_tracked_state gen6_vs_constants;
|
||||
extern const struct brw_tracked_state gen6_vs_state;
|
||||
extern const struct brw_tracked_state gen6_wm_constants;
|
||||
extern const struct brw_tracked_state gen6_wm_state;
|
||||
|
||||
@@ -129,6 +129,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
|
||||
|
||||
&brw_vs_constants, /* Before vs_surfaces and constant_buffer */
|
||||
&brw_wm_constants, /* Before wm_surfaces and constant_buffer */
|
||||
&gen6_vs_constants, /* Before vs_state */
|
||||
&gen6_wm_constants, /* Before wm_state */
|
||||
|
||||
&brw_vs_surfaces, /* must do before unit */
|
||||
|
||||
@@ -34,43 +34,36 @@
|
||||
#include "intel_batchbuffer.h"
|
||||
|
||||
static void
|
||||
upload_vs_state(struct brw_context *brw)
|
||||
gen6_prepare_vs_push_constants(struct brw_context *brw)
|
||||
{
|
||||
struct intel_context *intel = &brw->intel;
|
||||
struct gl_context *ctx = &intel->ctx;
|
||||
/* _BRW_NEW_VERTEX_PROGRAM */
|
||||
const struct brw_vertex_program *vp =
|
||||
brw_vertex_program_const(brw->vertex_program);
|
||||
unsigned int nr_params = brw->vs.prog_data->nr_params / 4;
|
||||
drm_intel_bo *constant_bo;
|
||||
int i;
|
||||
|
||||
if (brw->vertex_program->IsNVProgram)
|
||||
_mesa_load_tracked_matrices(ctx);
|
||||
|
||||
/* Updates the ParamaterValues[i] pointers for all parameters of the
|
||||
* basic type of PROGRAM_STATE_VAR.
|
||||
*/
|
||||
/* XXX: Should this happen somewhere before to get our state flag set? */
|
||||
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
|
||||
|
||||
/* CACHE_NEW_VS_PROG | _NEW_TRANSFORM */
|
||||
if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) {
|
||||
/* Disable the push constant buffers. */
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
brw->vs.push_const_size = 0;
|
||||
} else {
|
||||
int params_uploaded = 0, param_regs;
|
||||
int params_uploaded = 0;
|
||||
float *param;
|
||||
int i;
|
||||
|
||||
if (brw->vertex_program->IsNVProgram)
|
||||
_mesa_load_tracked_matrices(ctx);
|
||||
|
||||
/* Updates the ParamaterValues[i] pointers for all parameters of the
|
||||
* basic type of PROGRAM_STATE_VAR.
|
||||
*/
|
||||
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
|
||||
|
||||
constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo",
|
||||
(MAX_CLIP_PLANES + nr_params) *
|
||||
4 * sizeof(float),
|
||||
4096);
|
||||
drm_intel_gem_bo_map_gtt(constant_bo);
|
||||
param = constant_bo->virtual;
|
||||
param = brw_state_batch(brw,
|
||||
(MAX_CLIP_PLANES + nr_params) *
|
||||
4 * sizeof(float),
|
||||
32, &brw->vs.push_const_offset);
|
||||
|
||||
/* This should be loaded like any other param, but it's ad-hoc
|
||||
* until we redo the VS backend.
|
||||
@@ -100,30 +93,56 @@ upload_vs_state(struct brw_context *brw)
|
||||
if (0) {
|
||||
printf("VS constant buffer:\n");
|
||||
for (i = 0; i < params_uploaded; i++) {
|
||||
float *buf = (float *)constant_bo->virtual + i * 4;
|
||||
float *buf = param + i * 4;
|
||||
printf("%d: %f %f %f %f\n",
|
||||
i, buf[0], buf[1], buf[2], buf[3]);
|
||||
}
|
||||
}
|
||||
|
||||
drm_intel_gem_bo_unmap_gtt(constant_bo);
|
||||
brw->vs.push_const_size = (params_uploaded + 1) / 2;
|
||||
/* We can only push 32 registers of constants at a time. */
|
||||
assert(brw->vs.push_const_size <= 32);
|
||||
}
|
||||
}
|
||||
|
||||
param_regs = (params_uploaded + 1) / 2;
|
||||
assert(param_regs <= 32);
|
||||
const struct brw_tracked_state gen6_vs_constants = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
|
||||
.brw = (BRW_NEW_BATCH |
|
||||
BRW_NEW_VERTEX_PROGRAM),
|
||||
.cache = 0,
|
||||
},
|
||||
.prepare = gen6_prepare_vs_push_constants,
|
||||
};
|
||||
|
||||
static void
|
||||
upload_vs_state(struct brw_context *brw)
|
||||
{
|
||||
struct intel_context *intel = &brw->intel;
|
||||
|
||||
if (brw->vs.push_const_size == 0) {
|
||||
/* Disable the push constant buffers. */
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
|
||||
GEN6_CONSTANT_BUFFER_0_ENABLE |
|
||||
(5 - 2));
|
||||
OUT_RELOC(constant_bo,
|
||||
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
|
||||
param_regs - 1);
|
||||
OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
} else {
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
|
||||
GEN6_CONSTANT_BUFFER_0_ENABLE |
|
||||
(5 - 2));
|
||||
/* This is also the set of state flags from gen6_prepare_vs_constants */
|
||||
OUT_RELOC(intel->batch.bo,
|
||||
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
|
||||
brw->vs.push_const_offset +
|
||||
brw->vs.push_const_size - 1);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
drm_intel_bo_unreference(constant_bo);
|
||||
}
|
||||
|
||||
BEGIN_BATCH(6);
|
||||
@@ -149,7 +168,9 @@ const struct brw_tracked_state gen6_vs_state = {
|
||||
.brw = (BRW_NEW_CURBE_OFFSETS |
|
||||
BRW_NEW_NR_VS_SURFACES |
|
||||
BRW_NEW_URB_FENCE |
|
||||
BRW_NEW_CONTEXT),
|
||||
BRW_NEW_CONTEXT |
|
||||
BRW_NEW_VERTEX_PROGRAM |
|
||||
BRW_NEW_BATCH),
|
||||
.cache = CACHE_NEW_VS_PROG
|
||||
},
|
||||
.emit = upload_vs_state,
|
||||
|
||||
Reference in New Issue
Block a user