From 88f1656133589fba9bbe4e4114df881173eddab8 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Fri, 21 Feb 2025 10:28:16 -0800 Subject: [PATCH] intel/elk: Save the UW pixel x/y as a temp. This will be used for representing gl_FragCoord in NIR and reducing payload registers pushed. Part-of: --- src/intel/compiler/brw_compile_fs.cpp | 48 +++++++++++++++++++++------ 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp index 6b2c0d231ec..3099ea6be66 100644 --- a/src/intel/compiler/brw_compile_fs.cpp +++ b/src/intel/compiler/brw_compile_fs.cpp @@ -347,6 +347,9 @@ brw_emit_interpolation_setup(brw_shader &s) break; } + brw_reg uw_pixel_x = abld.vgrf(BRW_TYPE_UW); + brw_reg uw_pixel_y = abld.vgrf(BRW_TYPE_UW); + for (unsigned i = 0; i < DIV_ROUND_UP(s.dispatch_width, 16); i++) { const brw_builder hbld = abld.group(MIN2(16, s.dispatch_width), i); /* According to the "PS Thread Payload for Normal Dispatch" @@ -359,32 +362,54 @@ brw_emit_interpolation_setup(brw_shader &s) brw_vec1_grf(i + 1, 0); const struct brw_reg gi_uw = retype(gi_reg, BRW_TYPE_UW); + brw_reg int_pixel_x = offset(uw_pixel_x, hbld, i); + brw_reg int_pixel_y = offset(uw_pixel_y, hbld, i); + if (devinfo->verx10 >= 125) { + /* We compute two sets of int pixel x/y: one with a 2 byte stride for + * future load_pixel_coord, and one with a 4 byte stride to meet + * regioning restrictions for the add into a float result that + * implements the current load_frag_coord. + */ const brw_builder dbld = abld.exec_all().group(hbld.dispatch_width() * 2, 0); - const brw_reg int_pixel_x = dbld.vgrf(BRW_TYPE_UW); - const brw_reg int_pixel_y = dbld.vgrf(BRW_TYPE_UW); + const brw_reg int_pixel_x_4b = dbld.vgrf(BRW_TYPE_UW); + const brw_reg int_pixel_y_4b = dbld.vgrf(BRW_TYPE_UW); - dbld.ADD(int_pixel_x, + hbld.ADD(int_pixel_x, brw_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)), int_pixel_offset_x); - dbld.ADD(int_pixel_y, + hbld.ADD(int_pixel_y, + brw_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)), + int_pixel_offset_y); + dbld.ADD(int_pixel_x_4b, + brw_reg(stride(suboffset(gi_uw, 4), 2, 8, 0)), + int_pixel_offset_x); + dbld.ADD(int_pixel_y_4b, brw_reg(stride(suboffset(gi_uw, 5), 2, 8, 0)), int_pixel_offset_y); if (wm_prog_data->coarse_pixel_dispatch != INTEL_NEVER) { - brw_inst *addx = dbld.ADD(int_pixel_x, int_pixel_x, + brw_inst *addx = hbld.ADD(int_pixel_x, int_pixel_x, horiz_stride(half_int_pixel_offset_x, 0)); - brw_inst *addy = dbld.ADD(int_pixel_y, int_pixel_y, + brw_inst *addy = hbld.ADD(int_pixel_y, int_pixel_y, horiz_stride(half_int_pixel_offset_y, 0)); if (wm_prog_data->coarse_pixel_dispatch != INTEL_ALWAYS) { addx->predicate = BRW_PREDICATE_NORMAL; addy->predicate = BRW_PREDICATE_NORMAL; } + addx = dbld.ADD(int_pixel_x_4b, int_pixel_x_4b, + horiz_stride(half_int_pixel_offset_x, 0)); + addy = dbld.ADD(int_pixel_y_4b, int_pixel_y_4b, + horiz_stride(half_int_pixel_offset_y, 0)); + if (wm_prog_data->coarse_pixel_dispatch != INTEL_ALWAYS) { + addx->predicate = BRW_PREDICATE_NORMAL; + addy->predicate = BRW_PREDICATE_NORMAL; + } } - hbld.MOV(offset(s.pixel_x, hbld, i), horiz_stride(int_pixel_x, 2)); - hbld.MOV(offset(s.pixel_y, hbld, i), horiz_stride(int_pixel_y, 2)); + hbld.MOV(offset(s.pixel_x, hbld, i), horiz_stride(int_pixel_x_4b, 2)); + hbld.MOV(offset(s.pixel_y, hbld, i), horiz_stride(int_pixel_y_4b, 2)); } else { /* The "Register Region Restrictions" page says for BDW (and newer, @@ -405,10 +430,13 @@ brw_emit_interpolation_setup(brw_shader &s) brw_reg(stride(suboffset(gi_uw, 4), 1, 4, 0)), int_pixel_offset_xy); - hbld.emit(FS_OPCODE_PIXEL_X, offset(s.pixel_x, hbld, i), int_pixel_xy, + hbld.emit(FS_OPCODE_PIXEL_X, int_pixel_x, int_pixel_xy, horiz_stride(half_int_pixel_offset_x, 0)); - hbld.emit(FS_OPCODE_PIXEL_Y, offset(s.pixel_y, hbld, i), int_pixel_xy, + hbld.emit(FS_OPCODE_PIXEL_Y, int_pixel_y, int_pixel_xy, horiz_stride(half_int_pixel_offset_y, 0)); + + hbld.MOV(offset(s.pixel_x, hbld, i), int_pixel_x); + hbld.MOV(offset(s.pixel_y, hbld, i), int_pixel_y); } }