i965: Shrink Gen5 VUE map layout to be the same as Gen4.
The PRM suggests a larger layout, mostly to support having
gl_ClipDistance[] somewhere predictable for the fixed-function clipper
-- but it didn't actually arrive in Gen5.
Just use the same layout for both Gen4 and Gen5.
No Piglit regressions.
Improves performance in CS:S Video Stress Test by ~3%.
V2: - Remove now-useless function for determining the SF URB read offset
- Remove now-unused BRW_VARYING_SLOT_POS_DUPLICATE
Signed-off-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Paul Berry <stereotype441@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -333,7 +333,6 @@ struct brw_wm_prog_data {
|
||||
typedef enum
|
||||
{
|
||||
BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
|
||||
BRW_VARYING_SLOT_POS_DUPLICATE,
|
||||
BRW_VARYING_SLOT_PAD,
|
||||
/**
|
||||
* Technically this is not a varying but just a placeholder that
|
||||
|
||||
@@ -76,7 +76,7 @@ static void compile_sf_prog( struct brw_context *brw,
|
||||
c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
|
||||
c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
|
||||
}
|
||||
c.urb_entry_read_offset = brw_sf_compute_urb_entry_read_offset(intel);
|
||||
c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
|
||||
c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
|
||||
c.nr_setup_regs = c.nr_attr_regs;
|
||||
|
||||
|
||||
@@ -103,6 +103,7 @@ void brw_emit_line_setup( struct brw_sf_compile *c, bool allocate );
|
||||
void brw_emit_point_setup( struct brw_sf_compile *c, bool allocate );
|
||||
void brw_emit_point_sprite_setup( struct brw_sf_compile *c, bool allocate );
|
||||
void brw_emit_anyprim_setup( struct brw_sf_compile *c );
|
||||
int brw_sf_compute_urb_entry_read_offset(struct intel_context *intel);
|
||||
|
||||
#define BRW_SF_URB_ENTRY_READ_OFFSET 1
|
||||
|
||||
#endif
|
||||
|
||||
@@ -124,19 +124,6 @@ const struct brw_tracked_state brw_sf_vp = {
|
||||
.emit = upload_sf_vp
|
||||
};
|
||||
|
||||
/**
|
||||
* Compute the offset within the URB (expressed in 256-bit register
|
||||
* increments) that should be used to read the VUE in th efragment shader.
|
||||
*/
|
||||
int
|
||||
brw_sf_compute_urb_entry_read_offset(struct intel_context *intel)
|
||||
{
|
||||
if (intel->gen == 5)
|
||||
return 3;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void upload_sf_unit( struct brw_context *brw )
|
||||
{
|
||||
struct intel_context *intel = &brw->intel;
|
||||
@@ -163,9 +150,7 @@ static void upload_sf_unit( struct brw_context *brw )
|
||||
sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
|
||||
|
||||
sf->thread3.dispatch_grf_start_reg = 3;
|
||||
|
||||
sf->thread3.urb_entry_read_offset =
|
||||
brw_sf_compute_urb_entry_read_offset(intel);
|
||||
sf->thread3.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
|
||||
|
||||
/* CACHE_NEW_SF_PROG */
|
||||
sf->thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
|
||||
|
||||
@@ -2746,7 +2746,6 @@ vec4_visitor::emit_urb_slot(int mrf, int varying)
|
||||
current_annotation = "NDC";
|
||||
emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
|
||||
break;
|
||||
case BRW_VARYING_SLOT_POS_DUPLICATE:
|
||||
case VARYING_SLOT_POS:
|
||||
current_annotation = "gl_Position";
|
||||
emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
|
||||
|
||||
@@ -85,34 +85,17 @@ brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map,
|
||||
*/
|
||||
switch (intel->gen) {
|
||||
case 4:
|
||||
case 5:
|
||||
/* There are 8 dwords in VUE header pre-Ironlake:
|
||||
* dword 0-3 is indices, point width, clip flags.
|
||||
* dword 4-7 is ndc position
|
||||
* dword 8-11 is the first vertex data.
|
||||
*/
|
||||
assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
|
||||
assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
|
||||
assign_vue_slot(vue_map, VARYING_SLOT_POS);
|
||||
break;
|
||||
case 5:
|
||||
/* There are 20 DWs (D0-D19) in VUE header on Ironlake:
|
||||
* dword 0-3 of the header is indices, point width, clip flags.
|
||||
* dword 4-7 is the ndc position
|
||||
* dword 8-11 of the vertex header is the 4D space position
|
||||
* dword 12-19 of the vertex header is the user clip distance.
|
||||
* dword 20-23 is a pad so that the vertex element data is aligned
|
||||
* dword 24-27 is the first vertex data we fill.
|
||||
*
|
||||
* Note: future pipeline stages expect 4D space position to be
|
||||
* contiguous with the other varyings, so we make dword 24-27 a
|
||||
* duplicate copy of the 4D space position.
|
||||
* On Ironlake the VUE header is nominally 20 dwords, but the hardware
|
||||
* will accept the same header layout as Gen4 [and should be a bit faster]
|
||||
*/
|
||||
assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
|
||||
assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
|
||||
assign_vue_slot(vue_map, BRW_VARYING_SLOT_POS_DUPLICATE);
|
||||
assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
|
||||
assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
|
||||
assign_vue_slot(vue_map, BRW_VARYING_SLOT_PAD);
|
||||
assign_vue_slot(vue_map, VARYING_SLOT_POS);
|
||||
break;
|
||||
case 6:
|
||||
|
||||
Reference in New Issue
Block a user