intel/elk: Lower load_frag_coord to load_{pixel_coord,frag_coord_z/w} in NIR.

This moves some conversions to NIR that may get eliminated, and also
distinguishes gl_FragCoord.z/w loads at the shader info level so we don't
need to flag uses_src_depth/uses_src_w when only gl_FragCoord.xy get used
(as is typical).  This reduces thread payload setup on many shaders.
Also, interestingly, blorp shaders stop reserving space for z/w despite
not putting them in the payload (since PS_EXTRA isn't filled out for z/w).

HSW shader-db is noise:

total instructions in shared programs: 9942649 -> 9942997 (<.01%)
instructions in affected programs: 143167 -> 143515 (0.24%)

total cycles in shared programs: 314768862 -> 314299112 (-0.15%)
cycles in affected programs: 62951452 -> 62481702 (-0.75%)

LOST:   44
GAINED: 26

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25190>
This commit is contained in:
Emma Anholt
2025-02-21 10:42:29 -08:00
committed by Marge Bot
parent 88f1656133
commit 1134cdc198
7 changed files with 38 additions and 39 deletions
+1 -1
View File
@@ -1045,7 +1045,7 @@ emit:
const struct shader_info *fs_info =
crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);
if (BITSET_TEST(fs_info->system_values_read, SYSTEM_VALUE_FRAG_COORD)) {
if (BITSET_TEST(fs_info->system_values_read, SYSTEM_VALUE_FRAG_COORD_Z)) {
ice->state.global_depth_offset_clamp = 0;
crocus_emit_cmd(batch, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp);
}
-1
View File
@@ -1551,7 +1551,6 @@ region_alignment_rules(const struct elk_isa_info *isa,
* float destination type as well. We emit such instructions from
*
* elk_fs_visitor::emit_interpolation_setup_gfx6
* elk_fs_visitor::emit_fragcoord_interpolation
*
* and have for years with no ill effects.
*
+5 -3
View File
@@ -6265,7 +6265,9 @@ elk_fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
emit_repclear_shader();
} else {
if (nir->info.inputs_read > 0 ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PIXEL_COORD) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z) ||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_W) ||
(nir->info.outputs_read > 0 && !wm_key->coherent_fb_fetch)) {
if (devinfo->ver < 6)
emit_interpolation_setup_gfx4();
@@ -6648,9 +6650,9 @@ elk_nir_populate_wm_prog_data(nir_shader *shader,
prog_data->uses_vmask = true;
prog_data->uses_src_w =
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_W);
prog_data->uses_src_depth =
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z);
calculate_urb_setup(devinfo, key, prog_data, shader);
elk_compute_flat_inputs(prog_data, shader);
+2
View File
@@ -389,6 +389,8 @@ public:
bool source_depth_to_render_target;
bool runtime_check_aads_emit;
elk_fs_reg uw_pixel_x;
elk_fs_reg uw_pixel_y;
elk_fs_reg pixel_x;
elk_fs_reg pixel_y;
elk_fs_reg pixel_z;
+23 -32
View File
@@ -3317,37 +3317,6 @@ emit_is_helper_invocation(nir_to_elk_state &ntb, elk_fs_reg result)
}
}
static void
emit_fragcoord_interpolation(nir_to_elk_state &ntb, elk_fs_reg wpos)
{
const intel_device_info *devinfo = ntb.devinfo;
const fs_builder &bld = ntb.bld;
elk_fs_visitor &s = ntb.s;
assert(s.stage == MESA_SHADER_FRAGMENT);
/* gl_FragCoord.x */
bld.MOV(wpos, s.pixel_x);
wpos = offset(wpos, bld, 1);
/* gl_FragCoord.y */
bld.MOV(wpos, s.pixel_y);
wpos = offset(wpos, bld, 1);
/* gl_FragCoord.z */
if (devinfo->ver >= 6) {
bld.MOV(wpos, s.pixel_z);
} else {
bld.emit(ELK_FS_OPCODE_LINTERP, wpos,
s.delta_xy[ELK_BARYCENTRIC_PERSPECTIVE_PIXEL],
s.interp_reg(bld, VARYING_SLOT_POS, 2, 0));
}
wpos = offset(wpos, bld, 1);
/* gl_FragCoord.w: Already set up in emit_interpolation */
bld.MOV(wpos, s.wpos_w);
}
static elk_fs_reg
emit_frontfacing_interpolation(nir_to_elk_state &ntb)
{
@@ -3939,7 +3908,29 @@ fs_nir_emit_fs_intrinsic(nir_to_elk_state &ntb,
}
case nir_intrinsic_load_frag_coord:
emit_fragcoord_interpolation(ntb, dest);
unreachable("should be lowered by elk_nir_lower_frag_coord");
case nir_intrinsic_load_pixel_coord:
/* gl_FragCoord.xy: Just load the pixel xy from the payload, or more
* complicated emit_interpolation_setup_gfx6 setup
*/
dest = retype(dest, ELK_REGISTER_TYPE_UW);
bld.MOV(dest, s.uw_pixel_x);
bld.MOV(offset(dest, bld, 1), s.uw_pixel_y);
break;
case nir_intrinsic_load_frag_coord_z:
if (devinfo->ver >= 6) {
bld.MOV(dest, s.pixel_z);
} else {
bld.emit(ELK_FS_OPCODE_LINTERP, dest,
s.delta_xy[ELK_BARYCENTRIC_PERSPECTIVE_PIXEL],
s.interp_reg(bld, VARYING_SLOT_POS, 2, 0));
}
break;
case nir_intrinsic_load_frag_coord_w:
bld.MOV(dest, s.wpos_w);
break;
case nir_intrinsic_load_interpolated_input: {
+2 -2
View File
@@ -216,8 +216,8 @@ elk_fs_visitor::emit_interpolation_setup_gfx6()
elk_fs_reg half_int_pixel_offset_x = half_int_sample_offset_x;
elk_fs_reg half_int_pixel_offset_y = half_int_sample_offset_y;
elk_fs_reg uw_pixel_x = abld.vgrf(ELK_REGISTER_TYPE_UW);
elk_fs_reg uw_pixel_y = abld.vgrf(ELK_REGISTER_TYPE_UW);
uw_pixel_x = abld.vgrf(ELK_REGISTER_TYPE_UW);
uw_pixel_y = abld.vgrf(ELK_REGISTER_TYPE_UW);
for (unsigned i = 0; i < DIV_ROUND_UP(dispatch_width, 16); i++) {
const fs_builder hbld = abld.group(MIN2(16, dispatch_width), i);
+5
View File
@@ -1374,6 +1374,11 @@ elk_postprocess_nir(nir_shader *nir, const struct elk_compiler *compiler,
OPT(intel_nir_lower_sparse_intrinsics);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
/* This needs to run late, after lower_wpos_center and lower_input_attachments. */
OPT(nir_lower_frag_coord_to_pixel_coord);
}
OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler);
OPT(nir_opt_combine_barriers, combine_all_memory_barriers, NULL);