i965: Upload separate per-stage sampler state tables.
Also upload separate sampler default/texture border color entries. At the moment, this is completely idiotic: both tables contain exactly the same contents, so we're simply wasting batch space and CPU time. However, soon we'll only upload data for textures actually /used/ in a particular stage, which will usually make the VS table empty and very likely eliminate all redundancy. This is just a stepping stone. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Paul Berry <stereotype441@gmail.com>
This commit is contained in:
@@ -1064,11 +1064,6 @@ struct brw_context
|
||||
GLuint last_bufsz;
|
||||
} curbe;
|
||||
|
||||
/** SAMPLER_STATE count and offset */
|
||||
struct {
|
||||
uint32_t offset;
|
||||
} sampler;
|
||||
|
||||
/**
|
||||
* Layout of vertex data exiting the geometry portion of the pipleine.
|
||||
* This comes from the geometry shader if one exists, otherwise from the
|
||||
@@ -1110,7 +1105,13 @@ struct brw_context
|
||||
uint32_t bind_bo_offset;
|
||||
uint32_t surf_offset[BRW_MAX_VS_SURFACES];
|
||||
|
||||
/** SAMPLER_STATE count and table offset */
|
||||
uint32_t sampler_count;
|
||||
uint32_t sampler_offset;
|
||||
|
||||
/** Offsets in the batch to sampler default colors (texture border color)
|
||||
*/
|
||||
uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
|
||||
} vs;
|
||||
|
||||
struct {
|
||||
@@ -1153,10 +1154,6 @@ struct brw_context
|
||||
struct {
|
||||
struct brw_wm_prog_data *prog_data;
|
||||
|
||||
/** offsets in the batch to sampler default colors (texture border color)
|
||||
*/
|
||||
uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
|
||||
|
||||
GLuint render_surf;
|
||||
|
||||
drm_intel_bo *scratch_bo;
|
||||
@@ -1184,7 +1181,13 @@ struct brw_context
|
||||
uint32_t bind_bo_offset;
|
||||
uint32_t surf_offset[BRW_MAX_WM_SURFACES];
|
||||
|
||||
/** SAMPLER_STATE count and table offset */
|
||||
uint32_t sampler_count;
|
||||
uint32_t sampler_offset;
|
||||
|
||||
/** Offsets in the batch to sampler default colors (texture border color)
|
||||
*/
|
||||
uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
|
||||
|
||||
struct {
|
||||
struct ra_regs *regs;
|
||||
|
||||
@@ -157,12 +157,12 @@ brw_upload_vs_unit(struct brw_context *brw)
|
||||
*/
|
||||
if (brw->vs.sampler_count) {
|
||||
vs->vs5.sampler_state_pointer =
|
||||
(brw->batch.bo->offset + brw->sampler.offset) >> 5;
|
||||
(brw->batch.bo->offset + brw->vs.sampler_offset) >> 5;
|
||||
drm_intel_bo_emit_reloc(brw->batch.bo,
|
||||
brw->vs.state_offset +
|
||||
offsetof(struct brw_vs_unit_state, vs5),
|
||||
brw->batch.bo,
|
||||
brw->sampler.offset | vs->vs5.sampler_count,
|
||||
brw->vs.sampler_offset | vs->vs5.sampler_count,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, 0);
|
||||
}
|
||||
|
||||
|
||||
@@ -195,6 +195,7 @@ static void brw_update_sampler_state(struct brw_context *brw,
|
||||
int unit,
|
||||
int ss_index,
|
||||
struct brw_sampler_state *sampler,
|
||||
uint32_t sampler_state_table_offset,
|
||||
uint32_t *sdc_offset)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
@@ -347,7 +348,7 @@ static void brw_update_sampler_state(struct brw_context *brw,
|
||||
*sdc_offset) >> 5;
|
||||
|
||||
drm_intel_bo_emit_reloc(brw->batch.bo,
|
||||
brw->sampler.offset +
|
||||
sampler_state_table_offset +
|
||||
ss_index * sizeof(struct brw_sampler_state) +
|
||||
offsetof(struct brw_sampler_state, ss2),
|
||||
brw->batch.bo, *sdc_offset,
|
||||
@@ -366,7 +367,10 @@ static void brw_update_sampler_state(struct brw_context *brw,
|
||||
|
||||
|
||||
static void
|
||||
brw_upload_samplers(struct brw_context *brw)
|
||||
brw_upload_sampler_state_table(struct brw_context *brw,
|
||||
uint32_t *sampler_count,
|
||||
uint32_t *sst_offset,
|
||||
uint32_t *sdc_offset)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct brw_sampler_state *samplers;
|
||||
@@ -380,17 +384,15 @@ brw_upload_samplers(struct brw_context *brw)
|
||||
/* ARB programs use the texture unit number as the sampler index, so we
|
||||
* need to find the highest unit used. A bit-count will not work.
|
||||
*/
|
||||
brw->wm.sampler_count = _mesa_fls(SamplersUsed);
|
||||
/* Currently we only use one sampler state table. Mirror the count. */
|
||||
brw->vs.sampler_count = brw->wm.sampler_count;
|
||||
*sampler_count = _mesa_fls(SamplersUsed);
|
||||
|
||||
if (brw->wm.sampler_count == 0)
|
||||
if (*sampler_count == 0)
|
||||
return;
|
||||
|
||||
samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
|
||||
brw->wm.sampler_count * sizeof(*samplers),
|
||||
32, &brw->sampler.offset);
|
||||
memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers));
|
||||
*sampler_count * sizeof(*samplers),
|
||||
32, sst_offset);
|
||||
memset(samplers, 0, *sampler_count * sizeof(*samplers));
|
||||
|
||||
for (unsigned s = 0; s < brw->wm.sampler_count; s++) {
|
||||
if (SamplersUsed & (1 << s)) {
|
||||
@@ -398,13 +400,27 @@ brw_upload_samplers(struct brw_context *brw)
|
||||
fs->SamplerUnits[s] : vs->SamplerUnits[s];
|
||||
if (ctx->Texture.Unit[unit]._ReallyEnabled)
|
||||
brw_update_sampler_state(brw, unit, s, &samplers[s],
|
||||
&brw->wm.sdc_offset[s]);
|
||||
*sst_offset, &sdc_offset[s]);
|
||||
}
|
||||
}
|
||||
|
||||
brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_upload_samplers(struct brw_context *brw)
|
||||
{
|
||||
brw_upload_sampler_state_table(brw,
|
||||
&brw->wm.sampler_count,
|
||||
&brw->wm.sampler_offset,
|
||||
brw->wm.sdc_offset);
|
||||
|
||||
brw_upload_sampler_state_table(brw,
|
||||
&brw->vs.sampler_count,
|
||||
&brw->vs.sampler_offset,
|
||||
brw->vs.sdc_offset);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state brw_samplers = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_TEXTURE,
|
||||
|
||||
@@ -150,7 +150,7 @@ brw_upload_wm_unit(struct brw_context *brw)
|
||||
if (brw->wm.sampler_count) {
|
||||
/* reloc */
|
||||
wm->wm4.sampler_state_pointer = (brw->batch.bo->offset +
|
||||
brw->sampler.offset) >> 5;
|
||||
brw->wm.sampler_offset) >> 5;
|
||||
} else {
|
||||
wm->wm4.sampler_state_pointer = 0;
|
||||
}
|
||||
@@ -229,9 +229,9 @@ brw_upload_wm_unit(struct brw_context *brw)
|
||||
drm_intel_bo_emit_reloc(brw->batch.bo,
|
||||
brw->wm.state_offset +
|
||||
offsetof(struct brw_wm_unit_state, wm4),
|
||||
brw->batch.bo, (brw->sampler.offset |
|
||||
wm->wm4.stats_enable |
|
||||
(wm->wm4.sampler_count << 2)),
|
||||
brw->batch.bo, (brw->wm.sampler_offset |
|
||||
wm->wm4.stats_enable |
|
||||
(wm->wm4.sampler_count << 2)),
|
||||
I915_GEM_DOMAIN_INSTRUCTION, 0);
|
||||
}
|
||||
|
||||
|
||||
@@ -39,9 +39,9 @@ upload_sampler_state_pointers(struct brw_context *brw)
|
||||
GS_SAMPLER_STATE_CHANGE |
|
||||
PS_SAMPLER_STATE_CHANGE |
|
||||
(4 - 2));
|
||||
OUT_BATCH(brw->sampler.offset); /* VS */
|
||||
OUT_BATCH(brw->vs.sampler_offset); /* VS */
|
||||
OUT_BATCH(0); /* GS */
|
||||
OUT_BATCH(brw->sampler.offset);
|
||||
OUT_BATCH(brw->wm.sampler_offset);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
|
||||
@@ -185,7 +185,10 @@ gen7_update_sampler_state(struct brw_context *brw, int unit, int ss_index,
|
||||
|
||||
|
||||
static void
|
||||
gen7_upload_samplers(struct brw_context *brw)
|
||||
gen7_upload_sampler_state_table(struct brw_context *brw,
|
||||
uint32_t *sampler_count,
|
||||
uint32_t *sst_offset,
|
||||
uint32_t *sdc_offset)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct gen7_sampler_state *samplers;
|
||||
@@ -196,17 +199,15 @@ gen7_upload_samplers(struct brw_context *brw)
|
||||
|
||||
GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed;
|
||||
|
||||
brw->wm.sampler_count = _mesa_fls(SamplersUsed);
|
||||
/* Currently we only use one sampler state table. Mirror the count. */
|
||||
brw->vs.sampler_count = brw->wm.sampler_count;
|
||||
*sampler_count = _mesa_fls(SamplersUsed);
|
||||
|
||||
if (brw->wm.sampler_count == 0)
|
||||
if (*sampler_count == 0)
|
||||
return;
|
||||
|
||||
samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
|
||||
brw->wm.sampler_count * sizeof(*samplers),
|
||||
32, &brw->sampler.offset);
|
||||
memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers));
|
||||
*sampler_count * sizeof(*samplers),
|
||||
32, sst_offset);
|
||||
memset(samplers, 0, *sampler_count * sizeof(*samplers));
|
||||
|
||||
for (unsigned s = 0; s < brw->wm.sampler_count; s++) {
|
||||
if (SamplersUsed & (1 << s)) {
|
||||
@@ -214,13 +215,27 @@ gen7_upload_samplers(struct brw_context *brw)
|
||||
fs->SamplerUnits[s] : vs->SamplerUnits[s];
|
||||
if (ctx->Texture.Unit[unit]._ReallyEnabled)
|
||||
gen7_update_sampler_state(brw, unit, s, &samplers[s],
|
||||
&brw->wm.sdc_offset[s]);
|
||||
&sdc_offset[s]);
|
||||
}
|
||||
}
|
||||
|
||||
brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
|
||||
}
|
||||
|
||||
static void
|
||||
gen7_upload_samplers(struct brw_context *brw)
|
||||
{
|
||||
gen7_upload_sampler_state_table(brw,
|
||||
&brw->wm.sampler_count,
|
||||
&brw->wm.sampler_offset,
|
||||
brw->wm.sdc_offset);
|
||||
|
||||
gen7_upload_sampler_state_table(brw,
|
||||
&brw->vs.sampler_count,
|
||||
&brw->vs.sampler_offset,
|
||||
brw->vs.sdc_offset);
|
||||
}
|
||||
|
||||
const struct brw_tracked_state gen7_samplers = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_TEXTURE,
|
||||
|
||||
@@ -48,7 +48,7 @@ upload_vs_state(struct brw_context *brw)
|
||||
/* CACHE_NEW_SAMPLER */
|
||||
BEGIN_BATCH(2);
|
||||
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2));
|
||||
OUT_BATCH(brw->sampler.offset);
|
||||
OUT_BATCH(brw->vs.sampler_offset);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
if (brw->vs.push_const_size == 0) {
|
||||
|
||||
@@ -125,7 +125,7 @@ upload_ps_state(struct brw_context *brw)
|
||||
/* CACHE_NEW_SAMPLER */
|
||||
BEGIN_BATCH(2);
|
||||
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
|
||||
OUT_BATCH(brw->sampler.offset);
|
||||
OUT_BATCH(brw->wm.sampler_offset);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
/* CACHE_NEW_WM_PROG */
|
||||
|
||||
Reference in New Issue
Block a user