From 1134cdc1982dfb48858927a135349a29178dac9e Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Fri, 21 Feb 2025 10:42:29 -0800 Subject: [PATCH] intel/elk: Lower load_frag_coord to load_{pixel_coord,frag_coord_z/w} in NIR. This moves some conversions to NIR that may get eliminated, and also distinguishes gl_FragCoord.z/w loads at the shader info level so we don't need to flag uses_src_depth/uses_src_w when only gl_FragCoord.xy get used (as is typical). This reduces thread payload setup on many shaders. Also, interestingly, blorp shaders stop reserving space for z/w despite not putting them in the payload (since PS_EXTRA isn't filled out for z/w). HSW shader-db is noise: total instructions in shared programs: 9942649 -> 9942997 (<.01%) instructions in affected programs: 143167 -> 143515 (0.24%) total cycles in shared programs: 314768862 -> 314299112 (-0.15%) cycles in affected programs: 62951452 -> 62481702 (-0.75%) LOST: 44 GAINED: 26 Reviewed-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/crocus/crocus_state.c | 2 +- src/intel/compiler/elk/elk_eu_validate.c | 1 - src/intel/compiler/elk/elk_fs.cpp | 8 ++-- src/intel/compiler/elk/elk_fs.h | 2 + src/intel/compiler/elk/elk_fs_nir.cpp | 55 ++++++++++------------- src/intel/compiler/elk/elk_fs_visitor.cpp | 4 +- src/intel/compiler/elk/elk_nir.c | 5 +++ 7 files changed, 38 insertions(+), 39 deletions(-) diff --git a/src/gallium/drivers/crocus/crocus_state.c b/src/gallium/drivers/crocus/crocus_state.c index 6614bc30bfe..4bfb9ab4eab 100644 --- a/src/gallium/drivers/crocus/crocus_state.c +++ b/src/gallium/drivers/crocus/crocus_state.c @@ -1045,7 +1045,7 @@ emit: const struct shader_info *fs_info = crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT); - if (BITSET_TEST(fs_info->system_values_read, SYSTEM_VALUE_FRAG_COORD)) { + if (BITSET_TEST(fs_info->system_values_read, SYSTEM_VALUE_FRAG_COORD_Z)) { ice->state.global_depth_offset_clamp = 0; crocus_emit_cmd(batch, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp); } diff --git a/src/intel/compiler/elk/elk_eu_validate.c b/src/intel/compiler/elk/elk_eu_validate.c index 8a53fc4e91b..a4453f2ad6b 100644 --- a/src/intel/compiler/elk/elk_eu_validate.c +++ b/src/intel/compiler/elk/elk_eu_validate.c @@ -1551,7 +1551,6 @@ region_alignment_rules(const struct elk_isa_info *isa, * float destination type as well. We emit such instructions from * * elk_fs_visitor::emit_interpolation_setup_gfx6 - * elk_fs_visitor::emit_fragcoord_interpolation * * and have for years with no ill effects. * diff --git a/src/intel/compiler/elk/elk_fs.cpp b/src/intel/compiler/elk/elk_fs.cpp index c1781e8e541..6e24c509701 100644 --- a/src/intel/compiler/elk/elk_fs.cpp +++ b/src/intel/compiler/elk/elk_fs.cpp @@ -6265,7 +6265,9 @@ elk_fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) emit_repclear_shader(); } else { if (nir->info.inputs_read > 0 || - BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PIXEL_COORD) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z) || + BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_W) || (nir->info.outputs_read > 0 && !wm_key->coherent_fb_fetch)) { if (devinfo->ver < 6) emit_interpolation_setup_gfx4(); @@ -6648,9 +6650,9 @@ elk_nir_populate_wm_prog_data(nir_shader *shader, prog_data->uses_vmask = true; prog_data->uses_src_w = - BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD); + BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_W); prog_data->uses_src_depth = - BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD); + BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z); calculate_urb_setup(devinfo, key, prog_data, shader); elk_compute_flat_inputs(prog_data, shader); diff --git a/src/intel/compiler/elk/elk_fs.h b/src/intel/compiler/elk/elk_fs.h index c1c6f5ec47a..e9b8f7847f1 100644 --- a/src/intel/compiler/elk/elk_fs.h +++ b/src/intel/compiler/elk/elk_fs.h @@ -389,6 +389,8 @@ public: bool source_depth_to_render_target; bool runtime_check_aads_emit; + elk_fs_reg uw_pixel_x; + elk_fs_reg uw_pixel_y; elk_fs_reg pixel_x; elk_fs_reg pixel_y; elk_fs_reg pixel_z; diff --git a/src/intel/compiler/elk/elk_fs_nir.cpp b/src/intel/compiler/elk/elk_fs_nir.cpp index a6cb321a2be..2fac136e546 100644 --- a/src/intel/compiler/elk/elk_fs_nir.cpp +++ b/src/intel/compiler/elk/elk_fs_nir.cpp @@ -3317,37 +3317,6 @@ emit_is_helper_invocation(nir_to_elk_state &ntb, elk_fs_reg result) } } -static void -emit_fragcoord_interpolation(nir_to_elk_state &ntb, elk_fs_reg wpos) -{ - const intel_device_info *devinfo = ntb.devinfo; - const fs_builder &bld = ntb.bld; - elk_fs_visitor &s = ntb.s; - - assert(s.stage == MESA_SHADER_FRAGMENT); - - /* gl_FragCoord.x */ - bld.MOV(wpos, s.pixel_x); - wpos = offset(wpos, bld, 1); - - /* gl_FragCoord.y */ - bld.MOV(wpos, s.pixel_y); - wpos = offset(wpos, bld, 1); - - /* gl_FragCoord.z */ - if (devinfo->ver >= 6) { - bld.MOV(wpos, s.pixel_z); - } else { - bld.emit(ELK_FS_OPCODE_LINTERP, wpos, - s.delta_xy[ELK_BARYCENTRIC_PERSPECTIVE_PIXEL], - s.interp_reg(bld, VARYING_SLOT_POS, 2, 0)); - } - wpos = offset(wpos, bld, 1); - - /* gl_FragCoord.w: Already set up in emit_interpolation */ - bld.MOV(wpos, s.wpos_w); -} - static elk_fs_reg emit_frontfacing_interpolation(nir_to_elk_state &ntb) { @@ -3939,7 +3908,29 @@ fs_nir_emit_fs_intrinsic(nir_to_elk_state &ntb, } case nir_intrinsic_load_frag_coord: - emit_fragcoord_interpolation(ntb, dest); + unreachable("should be lowered by elk_nir_lower_frag_coord"); + + case nir_intrinsic_load_pixel_coord: + /* gl_FragCoord.xy: Just load the pixel xy from the payload, or more + * complicated emit_interpolation_setup_gfx6 setup + */ + dest = retype(dest, ELK_REGISTER_TYPE_UW); + bld.MOV(dest, s.uw_pixel_x); + bld.MOV(offset(dest, bld, 1), s.uw_pixel_y); + break; + + case nir_intrinsic_load_frag_coord_z: + if (devinfo->ver >= 6) { + bld.MOV(dest, s.pixel_z); + } else { + bld.emit(ELK_FS_OPCODE_LINTERP, dest, + s.delta_xy[ELK_BARYCENTRIC_PERSPECTIVE_PIXEL], + s.interp_reg(bld, VARYING_SLOT_POS, 2, 0)); + } + break; + + case nir_intrinsic_load_frag_coord_w: + bld.MOV(dest, s.wpos_w); break; case nir_intrinsic_load_interpolated_input: { diff --git a/src/intel/compiler/elk/elk_fs_visitor.cpp b/src/intel/compiler/elk/elk_fs_visitor.cpp index f745cf12896..032a8d94485 100644 --- a/src/intel/compiler/elk/elk_fs_visitor.cpp +++ b/src/intel/compiler/elk/elk_fs_visitor.cpp @@ -216,8 +216,8 @@ elk_fs_visitor::emit_interpolation_setup_gfx6() elk_fs_reg half_int_pixel_offset_x = half_int_sample_offset_x; elk_fs_reg half_int_pixel_offset_y = half_int_sample_offset_y; - elk_fs_reg uw_pixel_x = abld.vgrf(ELK_REGISTER_TYPE_UW); - elk_fs_reg uw_pixel_y = abld.vgrf(ELK_REGISTER_TYPE_UW); + uw_pixel_x = abld.vgrf(ELK_REGISTER_TYPE_UW); + uw_pixel_y = abld.vgrf(ELK_REGISTER_TYPE_UW); for (unsigned i = 0; i < DIV_ROUND_UP(dispatch_width, 16); i++) { const fs_builder hbld = abld.group(MIN2(16, dispatch_width), i); diff --git a/src/intel/compiler/elk/elk_nir.c b/src/intel/compiler/elk/elk_nir.c index 9326fd82fd8..67cc8fd9150 100644 --- a/src/intel/compiler/elk/elk_nir.c +++ b/src/intel/compiler/elk/elk_nir.c @@ -1374,6 +1374,11 @@ elk_postprocess_nir(nir_shader *nir, const struct elk_compiler *compiler, OPT(intel_nir_lower_sparse_intrinsics); + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + /* This needs to run late, after lower_wpos_center and lower_input_attachments. */ + OPT(nir_lower_frag_coord_to_pixel_coord); + } + OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler); OPT(nir_opt_combine_barriers, combine_all_memory_barriers, NULL);