intel/elk: Lower load_frag_coord to load_{pixel_coord,frag_coord_z/w} in NIR.
This moves some conversions to NIR that may get eliminated, and also distinguishes gl_FragCoord.z/w loads at the shader info level so we don't need to flag uses_src_depth/uses_src_w when only gl_FragCoord.xy get used (as is typical). This reduces thread payload setup on many shaders. Also, interestingly, blorp shaders stop reserving space for z/w despite not putting them in the payload (since PS_EXTRA isn't filled out for z/w). HSW shader-db is noise: total instructions in shared programs: 9942649 -> 9942997 (<.01%) instructions in affected programs: 143167 -> 143515 (0.24%) total cycles in shared programs: 314768862 -> 314299112 (-0.15%) cycles in affected programs: 62951452 -> 62481702 (-0.75%) LOST: 44 GAINED: 26 Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25190>
This commit is contained in:
@@ -1045,7 +1045,7 @@ emit:
|
||||
const struct shader_info *fs_info =
|
||||
crocus_get_shader_info(ice, MESA_SHADER_FRAGMENT);
|
||||
|
||||
if (BITSET_TEST(fs_info->system_values_read, SYSTEM_VALUE_FRAG_COORD)) {
|
||||
if (BITSET_TEST(fs_info->system_values_read, SYSTEM_VALUE_FRAG_COORD_Z)) {
|
||||
ice->state.global_depth_offset_clamp = 0;
|
||||
crocus_emit_cmd(batch, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp);
|
||||
}
|
||||
|
||||
@@ -1551,7 +1551,6 @@ region_alignment_rules(const struct elk_isa_info *isa,
|
||||
* float destination type as well. We emit such instructions from
|
||||
*
|
||||
* elk_fs_visitor::emit_interpolation_setup_gfx6
|
||||
* elk_fs_visitor::emit_fragcoord_interpolation
|
||||
*
|
||||
* and have for years with no ill effects.
|
||||
*
|
||||
|
||||
@@ -6265,7 +6265,9 @@ elk_fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
||||
emit_repclear_shader();
|
||||
} else {
|
||||
if (nir->info.inputs_read > 0 ||
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PIXEL_COORD) ||
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z) ||
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_W) ||
|
||||
(nir->info.outputs_read > 0 && !wm_key->coherent_fb_fetch)) {
|
||||
if (devinfo->ver < 6)
|
||||
emit_interpolation_setup_gfx4();
|
||||
@@ -6648,9 +6650,9 @@ elk_nir_populate_wm_prog_data(nir_shader *shader,
|
||||
prog_data->uses_vmask = true;
|
||||
|
||||
prog_data->uses_src_w =
|
||||
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
|
||||
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_W);
|
||||
prog_data->uses_src_depth =
|
||||
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);
|
||||
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD_Z);
|
||||
|
||||
calculate_urb_setup(devinfo, key, prog_data, shader);
|
||||
elk_compute_flat_inputs(prog_data, shader);
|
||||
|
||||
@@ -389,6 +389,8 @@ public:
|
||||
bool source_depth_to_render_target;
|
||||
bool runtime_check_aads_emit;
|
||||
|
||||
elk_fs_reg uw_pixel_x;
|
||||
elk_fs_reg uw_pixel_y;
|
||||
elk_fs_reg pixel_x;
|
||||
elk_fs_reg pixel_y;
|
||||
elk_fs_reg pixel_z;
|
||||
|
||||
@@ -3317,37 +3317,6 @@ emit_is_helper_invocation(nir_to_elk_state &ntb, elk_fs_reg result)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_fragcoord_interpolation(nir_to_elk_state &ntb, elk_fs_reg wpos)
|
||||
{
|
||||
const intel_device_info *devinfo = ntb.devinfo;
|
||||
const fs_builder &bld = ntb.bld;
|
||||
elk_fs_visitor &s = ntb.s;
|
||||
|
||||
assert(s.stage == MESA_SHADER_FRAGMENT);
|
||||
|
||||
/* gl_FragCoord.x */
|
||||
bld.MOV(wpos, s.pixel_x);
|
||||
wpos = offset(wpos, bld, 1);
|
||||
|
||||
/* gl_FragCoord.y */
|
||||
bld.MOV(wpos, s.pixel_y);
|
||||
wpos = offset(wpos, bld, 1);
|
||||
|
||||
/* gl_FragCoord.z */
|
||||
if (devinfo->ver >= 6) {
|
||||
bld.MOV(wpos, s.pixel_z);
|
||||
} else {
|
||||
bld.emit(ELK_FS_OPCODE_LINTERP, wpos,
|
||||
s.delta_xy[ELK_BARYCENTRIC_PERSPECTIVE_PIXEL],
|
||||
s.interp_reg(bld, VARYING_SLOT_POS, 2, 0));
|
||||
}
|
||||
wpos = offset(wpos, bld, 1);
|
||||
|
||||
/* gl_FragCoord.w: Already set up in emit_interpolation */
|
||||
bld.MOV(wpos, s.wpos_w);
|
||||
}
|
||||
|
||||
static elk_fs_reg
|
||||
emit_frontfacing_interpolation(nir_to_elk_state &ntb)
|
||||
{
|
||||
@@ -3939,7 +3908,29 @@ fs_nir_emit_fs_intrinsic(nir_to_elk_state &ntb,
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_frag_coord:
|
||||
emit_fragcoord_interpolation(ntb, dest);
|
||||
unreachable("should be lowered by elk_nir_lower_frag_coord");
|
||||
|
||||
case nir_intrinsic_load_pixel_coord:
|
||||
/* gl_FragCoord.xy: Just load the pixel xy from the payload, or more
|
||||
* complicated emit_interpolation_setup_gfx6 setup
|
||||
*/
|
||||
dest = retype(dest, ELK_REGISTER_TYPE_UW);
|
||||
bld.MOV(dest, s.uw_pixel_x);
|
||||
bld.MOV(offset(dest, bld, 1), s.uw_pixel_y);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_frag_coord_z:
|
||||
if (devinfo->ver >= 6) {
|
||||
bld.MOV(dest, s.pixel_z);
|
||||
} else {
|
||||
bld.emit(ELK_FS_OPCODE_LINTERP, dest,
|
||||
s.delta_xy[ELK_BARYCENTRIC_PERSPECTIVE_PIXEL],
|
||||
s.interp_reg(bld, VARYING_SLOT_POS, 2, 0));
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_frag_coord_w:
|
||||
bld.MOV(dest, s.wpos_w);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_interpolated_input: {
|
||||
|
||||
@@ -216,8 +216,8 @@ elk_fs_visitor::emit_interpolation_setup_gfx6()
|
||||
elk_fs_reg half_int_pixel_offset_x = half_int_sample_offset_x;
|
||||
elk_fs_reg half_int_pixel_offset_y = half_int_sample_offset_y;
|
||||
|
||||
elk_fs_reg uw_pixel_x = abld.vgrf(ELK_REGISTER_TYPE_UW);
|
||||
elk_fs_reg uw_pixel_y = abld.vgrf(ELK_REGISTER_TYPE_UW);
|
||||
uw_pixel_x = abld.vgrf(ELK_REGISTER_TYPE_UW);
|
||||
uw_pixel_y = abld.vgrf(ELK_REGISTER_TYPE_UW);
|
||||
|
||||
for (unsigned i = 0; i < DIV_ROUND_UP(dispatch_width, 16); i++) {
|
||||
const fs_builder hbld = abld.group(MIN2(16, dispatch_width), i);
|
||||
|
||||
@@ -1374,6 +1374,11 @@ elk_postprocess_nir(nir_shader *nir, const struct elk_compiler *compiler,
|
||||
|
||||
OPT(intel_nir_lower_sparse_intrinsics);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
/* This needs to run late, after lower_wpos_center and lower_input_attachments. */
|
||||
OPT(nir_lower_frag_coord_to_pixel_coord);
|
||||
}
|
||||
|
||||
OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler);
|
||||
|
||||
OPT(nir_opt_combine_barriers, combine_all_memory_barriers, NULL);
|
||||
|
||||
Reference in New Issue
Block a user