intel/elk: Save the UW pixel x/y as a temp.

This will be used for representing gl_FragCoord in NIR and reducing
payload registers pushed.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25190>
This commit is contained in:
Emma Anholt
2025-02-21 10:28:16 -08:00
committed by Marge Bot
parent 5222c35924
commit 88f1656133

View File

@@ -347,6 +347,9 @@ brw_emit_interpolation_setup(brw_shader &s)
break;
}
brw_reg uw_pixel_x = abld.vgrf(BRW_TYPE_UW);
brw_reg uw_pixel_y = abld.vgrf(BRW_TYPE_UW);
for (unsigned i = 0; i < DIV_ROUND_UP(s.dispatch_width, 16); i++) {
const brw_builder hbld = abld.group(MIN2(16, s.dispatch_width), i);
/* According to the "PS Thread Payload for Normal Dispatch"
@@ -359,32 +362,54 @@ brw_emit_interpolation_setup(brw_shader &s)
brw_vec1_grf(i + 1, 0);
const struct brw_reg gi_uw = retype(gi_reg, BRW_TYPE_UW);
brw_reg int_pixel_x = offset(uw_pixel_x, hbld, i);
brw_reg int_pixel_y = offset(uw_pixel_y, hbld, i);
if (devinfo->verx10 >= 125) {
/* We compute two sets of int pixel x/y: one with a 2 byte stride for
* future load_pixel_coord, and one with a 4 byte stride to meet
* regioning restrictions for the add into a float result that
* implements the current load_frag_coord.
*/
const brw_builder dbld =
abld.exec_all().group(hbld.dispatch_width() * 2, 0);
const brw_reg int_pixel_x = dbld.vgrf(BRW_TYPE_UW);
const brw_reg int_pixel_y = dbld.vgrf(BRW_TYPE_UW);
const brw_reg int_pixel_x_4b = dbld.vgrf(BRW_TYPE_UW);
const brw_reg int_pixel_y_4b = dbld.vgrf(BRW_TYPE_UW);
dbld.ADD(int_pixel_x,
hbld.ADD(int_pixel_x,
brw_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)),
int_pixel_offset_x);
dbld.ADD(int_pixel_y,
hbld.ADD(int_pixel_y,
brw_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)),
int_pixel_offset_y);
dbld.ADD(int_pixel_x_4b,
brw_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)),
int_pixel_offset_x);
dbld.ADD(int_pixel_y_4b,
brw_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)),
int_pixel_offset_y);
if (wm_prog_data->coarse_pixel_dispatch != INTEL_NEVER) {
brw_inst *addx = dbld.ADD(int_pixel_x, int_pixel_x,
brw_inst *addx = hbld.ADD(int_pixel_x, int_pixel_x,
horiz_stride(half_int_pixel_offset_x, 0));
brw_inst *addy = dbld.ADD(int_pixel_y, int_pixel_y,
brw_inst *addy = hbld.ADD(int_pixel_y, int_pixel_y,
horiz_stride(half_int_pixel_offset_y, 0));
if (wm_prog_data->coarse_pixel_dispatch != INTEL_ALWAYS) {
addx->predicate = BRW_PREDICATE_NORMAL;
addy->predicate = BRW_PREDICATE_NORMAL;
}
addx = dbld.ADD(int_pixel_x_4b, int_pixel_x_4b,
horiz_stride(half_int_pixel_offset_x, 0));
addy = dbld.ADD(int_pixel_y_4b, int_pixel_y_4b,
horiz_stride(half_int_pixel_offset_y, 0));
if (wm_prog_data->coarse_pixel_dispatch != INTEL_ALWAYS) {
addx->predicate = BRW_PREDICATE_NORMAL;
addy->predicate = BRW_PREDICATE_NORMAL;
}
}
hbld.MOV(offset(s.pixel_x, hbld, i), horiz_stride(int_pixel_x, 2));
hbld.MOV(offset(s.pixel_y, hbld, i), horiz_stride(int_pixel_y, 2));
hbld.MOV(offset(s.pixel_x, hbld, i), horiz_stride(int_pixel_x_4b, 2));
hbld.MOV(offset(s.pixel_y, hbld, i), horiz_stride(int_pixel_y_4b, 2));
} else {
/* The "Register Region Restrictions" page says for BDW (and newer,
@@ -405,10 +430,13 @@ brw_emit_interpolation_setup(brw_shader &s)
brw_reg(stride(suboffset(gi_uw, 4), 1, 4, 0)),
int_pixel_offset_xy);
hbld.emit(FS_OPCODE_PIXEL_X, offset(s.pixel_x, hbld, i), int_pixel_xy,
hbld.emit(FS_OPCODE_PIXEL_X, int_pixel_x, int_pixel_xy,
horiz_stride(half_int_pixel_offset_x, 0));
hbld.emit(FS_OPCODE_PIXEL_Y, offset(s.pixel_y, hbld, i), int_pixel_xy,
hbld.emit(FS_OPCODE_PIXEL_Y, int_pixel_y, int_pixel_xy,
horiz_stride(half_int_pixel_offset_y, 0));
hbld.MOV(offset(s.pixel_x, hbld, i), int_pixel_x);
hbld.MOV(offset(s.pixel_y, hbld, i), int_pixel_y);
}
}