i965: Tell the unit states how many binding table entries we have.
Before the series with 3c9dc2d31b to
dynamically assign our binding table indices, we didn't really track our
binding table count per shader, so we never filled in these fields.
Affects cairo-gl trace runtime by -2.47953% +/- 1.07281% (n=20)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -79,7 +79,8 @@ brw_upload_vs_unit(struct brw_context *brw)
|
||||
*/
|
||||
vs->thread1.single_program_flow = (brw->gen == 5);
|
||||
|
||||
vs->thread1.binding_table_entry_count = 0;
|
||||
vs->thread1.binding_table_entry_count =
|
||||
brw->vs.prog_data->base.base.binding_table.size_bytes / 4;
|
||||
|
||||
if (brw->vs.prog_data->base.total_scratch != 0) {
|
||||
vs->thread2.scratch_space_base_pointer =
|
||||
|
||||
@@ -120,7 +120,8 @@ brw_upload_wm_unit(struct brw_context *brw)
|
||||
else
|
||||
wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
|
||||
|
||||
wm->thread1.binding_table_entry_count = 0;
|
||||
wm->thread1.binding_table_entry_count =
|
||||
brw->wm.prog_data->base.binding_table.size_bytes / 4;
|
||||
|
||||
if (brw->wm.prog_data->total_scratch != 0) {
|
||||
wm->thread2.scratch_space_base_pointer =
|
||||
|
||||
@@ -165,7 +165,9 @@ upload_vs_state(struct brw_context *brw)
|
||||
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
|
||||
OUT_BATCH(stage_state->prog_offset);
|
||||
OUT_BATCH(floating_point_mode |
|
||||
((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT));
|
||||
((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT) |
|
||||
((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
|
||||
GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
|
||||
|
||||
if (brw->vs.prog_data->base.total_scratch) {
|
||||
OUT_RELOC(stage_state->scratch_bo,
|
||||
|
||||
@@ -146,6 +146,11 @@ upload_wm_state(struct brw_context *brw)
|
||||
/* CACHE_NEW_SAMPLER */
|
||||
dw2 |= (ALIGN(brw->wm.base.sampler_count, 4) / 4) <<
|
||||
GEN6_WM_SAMPLER_COUNT_SHIFT;
|
||||
|
||||
/* CACHE_NEW_WM_PROG */
|
||||
dw2 |= ((brw->wm.prog_data->base.binding_table.size_bytes / 4) <<
|
||||
GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
|
||||
|
||||
dw4 |= (brw->wm.prog_data->first_curbe_grf <<
|
||||
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
|
||||
dw4 |= (brw->wm.prog_data->first_curbe_grf_16 <<
|
||||
|
||||
@@ -85,7 +85,9 @@ upload_gs_state(struct brw_context *brw)
|
||||
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
|
||||
OUT_BATCH(stage_state->prog_offset);
|
||||
OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
|
||||
GEN6_GS_SAMPLER_COUNT_SHIFT));
|
||||
GEN6_GS_SAMPLER_COUNT_SHIFT) |
|
||||
((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
|
||||
GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
|
||||
|
||||
if (brw->gs.prog_data->base.total_scratch) {
|
||||
OUT_RELOC(stage_state->scratch_bo,
|
||||
|
||||
@@ -100,7 +100,9 @@ upload_vs_state(struct brw_context *brw)
|
||||
OUT_BATCH(stage_state->prog_offset);
|
||||
OUT_BATCH(floating_point_mode |
|
||||
((ALIGN(stage_state->sampler_count, 4)/4) <<
|
||||
GEN6_VS_SAMPLER_COUNT_SHIFT));
|
||||
GEN6_VS_SAMPLER_COUNT_SHIFT) |
|
||||
((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
|
||||
GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
|
||||
|
||||
if (brw->vs.prog_data->base.total_scratch) {
|
||||
OUT_RELOC(stage_state->scratch_bo,
|
||||
|
||||
@@ -160,6 +160,10 @@ upload_ps_state(struct brw_context *brw)
|
||||
dw2 |=
|
||||
(ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
|
||||
|
||||
/* CACHE_NEW_WM_PROG */
|
||||
dw2 |= ((brw->wm.prog_data->base.binding_table.size_bytes / 4) <<
|
||||
GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
|
||||
|
||||
/* Use ALT floating point mode for ARB fragment programs, because they
|
||||
* require 0^0 == 1. Even though _CurrentFragmentProgram is used for
|
||||
* rendering, CurrentFragmentProgram is used for this check to
|
||||
|
||||
Reference in New Issue
Block a user