From 5b8ec015f27e879438216f20198e907419ee2f13 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Tue, 21 Nov 2023 09:47:18 -0800 Subject: [PATCH] intel/compiler: Don't use fs_visitor::bld in remaining places The remaining users can simply create a new builder at_end() if needed. In many places a new builder object is already being constructed, so just give more specific instructions. Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_fs.cpp | 37 +++++++++---------- .../compiler/brw_fs_combine_constants.cpp | 2 +- src/intel/compiler/brw_fs_nir.cpp | 1 + src/intel/compiler/brw_fs_visitor.cpp | 10 ++++- .../compiler/brw_lower_logical_sends.cpp | 2 +- 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 60b1f969433..1e740a697ec 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1231,7 +1231,7 @@ fs_visitor::emit_gs_thread_end() emit_gs_control_data_bits(this->final_gs_vertex_count); } - const fs_builder abld = bld.annotate("thread end"); + const fs_builder abld = fs_builder(this, dispatch_width).at_end().annotate("thread end"); fs_inst *inst; if (gs_prog_data->static_vertex_count != -1) { @@ -1285,7 +1285,7 @@ fs_visitor::assign_curb_setup() assert(uniform_push_length <= reg_unit(devinfo)); } else if (is_compute && devinfo->verx10 >= 125) { assert(devinfo->has_lsc); - fs_builder ubld = bld.exec_all().group(1, 0).at( + fs_builder ubld = fs_builder(this, 1).exec_all().at( cfg->first_block(), cfg->first_block()->start()); /* The base offset for our push data is passed in as R0.0[31:6]. We have @@ -1382,7 +1382,7 @@ fs_visitor::assign_curb_setup() uint64_t want_zero = used & stage_prog_data->zero_push_reg; if (want_zero) { - fs_builder ubld = bld.exec_all().group(8, 0).at( + fs_builder ubld = fs_builder(this, 8).exec_all().at( cfg->first_block(), cfg->first_block()->start()); /* push_reg_mask_param is in 32-bit units */ @@ -3307,6 +3307,7 @@ fs_visitor::emit_repclear_shader() BRW_VERTICAL_STRIDE_8, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); bld.exec_all().group(4, 0).MOV(color_output, color_input); if (key->nr_color_regions > 1) { @@ -5310,6 +5311,8 @@ fs_visitor::lower_simd_width() * we're sure that both cases can be handled. */ const unsigned max_width = MAX2(inst->exec_size, lower_width); + + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); const fs_builder ibld = bld.at(block, inst) .exec_all(inst->force_writemask_all) .group(max_width, inst->group / max_width); @@ -5584,7 +5587,7 @@ fs_visitor::lower_find_live_channel() if (!inst->is_partial_write()) ibld.emit_undef_for_dst(inst); - const fs_builder ubld = bld.at(block, inst).exec_all().group(1, 0); + const fs_builder ubld = fs_builder(this, block, inst).exec_all().group(1, 0); /* ce0 doesn't consider the thread dispatch mask (DMask or VMask), * so combine the execution and dispatch masks to obtain the true mask. @@ -5946,19 +5949,6 @@ fs_visitor::optimize() /* Start by validating the shader we currently have. */ validate(); - /* bld is the common builder object pointing at the end of the program we - * used to translate it into i965 IR. For the optimization and lowering - * passes coming next, any code added after the end of the program without - * having explicitly called fs_builder::at() clearly points at a mistake. - * Ideally optimization passes wouldn't be part of the visitor so they - * wouldn't have access to bld at all, but they do, so just in case some - * pass forgets to ask for a location explicitly set it to NULL here to - * make it trip. The dispatch width is initialized to a bogus value to - * make sure that optimizations set the execution controls explicitly to - * match the code they are manipulating instead of relying on the defaults. - */ - bld = fs_builder(this, 64); - bool progress = false; int iteration = 0; int pass_num = 0; @@ -6138,7 +6128,7 @@ fs_visitor::fixup_sends_duplicate_payload() /* Sadly, we've lost all notion of channels and bit sizes at this * point. Just WE_all it. */ - const fs_builder ibld = bld.at(block, inst).exec_all().group(16, 0); + const fs_builder ibld = fs_builder(this, block, inst).exec_all().group(16, 0); fs_reg copy_src = retype(inst->src[3], BRW_REGISTER_TYPE_UD); fs_reg copy_dst = tmp; for (unsigned i = 0; i < inst->ex_mlen; i += 2) { @@ -6242,8 +6232,8 @@ fs_visitor::emit_dummy_mov_instruction() /* Insert dummy mov as first instruction. */ const fs_builder ubld = - bld.at(cfg->first_block(), first_inst).exec_all().group(8, 0); - ubld.MOV(bld.null_reg_ud(), brw_imm_ud(0u)); + fs_builder(this, cfg->first_block(), (fs_inst *)first_inst).exec_all().group(8, 0); + ubld.MOV(ubld.null_reg_ud(), brw_imm_ud(0u)); invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); } @@ -6700,6 +6690,7 @@ fs_visitor::set_tcs_invocation_id() { struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data); struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base; + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); const unsigned instance_id_mask = (devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) : @@ -6751,6 +6742,8 @@ fs_visitor::emit_tcs_thread_end() if (devinfo->ver != 8 && mark_last_urb_write_with_eot()) return; + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); + /* Emit a URB write to end the thread. On Broadwell, we use this to write * zero to the "TR DS Cache Disable" bit (we haven't implemented a fancy * algorithm to set it optimally). On other platforms, we simply write @@ -6772,6 +6765,7 @@ fs_visitor::run_tcs() assert(stage == MESA_SHADER_TESS_CTRL); struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data); + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH || vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_MULTI_PATCH); @@ -6871,6 +6865,7 @@ fs_visitor::run_gs() * Otherwise, we need to initialize it to 0 here. */ if (gs_compile->control_data_header_size_bits <= 32) { + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); const fs_builder abld = bld.annotate("initialize control data bits"); abld.MOV(this->control_data_bits, brw_imm_ud(0u)); } @@ -6933,6 +6928,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) { struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data); brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key; + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); assert(stage == MESA_SHADER_FRAGMENT); @@ -7008,6 +7004,7 @@ fs_visitor::run_cs(bool allow_spilling) { assert(gl_shader_stage_is_compute(stage)); assert(devinfo->ver >= 7); + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); payload_ = new cs_thread_payload(*this); diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp index 0dd8769f6bb..b16a95dfbdf 100644 --- a/src/intel/compiler/brw_fs_combine_constants.cpp +++ b/src/intel/compiler/brw_fs_combine_constants.cpp @@ -1660,7 +1660,7 @@ fs_visitor::opt_combine_constants() * both HF slots within a DWord with the constant. */ const uint32_t width = devinfo->ver == 8 && imm->is_half_float ? 2 : 1; - const fs_builder ibld = bld.at(insert_block, n).exec_all().group(width, 0); + const fs_builder ibld = fs_builder(this, width).at(insert_block, n).exec_all(); fs_reg reg(VGRF, imm->nr); reg.offset = imm->subreg_offset; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 570793830dc..d0dbfd30d33 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2218,6 +2218,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count) struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data); + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); const fs_builder abld = bld.annotate("emit control data bits"); const fs_builder fwa_bld = bld.exec_all(); diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 5fd51bad80d..4f344f64ddd 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -94,7 +94,7 @@ fs_visitor::emit_interpolation_setup_gfx4() { struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); - fs_builder abld = bld.annotate("compute pixel centers"); + fs_builder abld = fs_builder(this, dispatch_width).at_end().annotate("compute pixel centers"); this->pixel_x = vgrf(glsl_type::uint_type); this->pixel_y = vgrf(glsl_type::uint_type); this->pixel_x.type = BRW_REGISTER_TYPE_UW; @@ -106,6 +106,7 @@ fs_visitor::emit_interpolation_setup_gfx4() fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), fs_reg(brw_imm_v(0x11001100))); + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); abld = bld.annotate("compute pixel deltas from v0"); this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL] = @@ -151,6 +152,7 @@ fs_visitor::emit_interpolation_setup_gfx4() void fs_visitor::emit_interpolation_setup_gfx6() { + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); fs_builder abld = bld.annotate("compute pixel centers"); this->pixel_x = vgrf(glsl_type::float_type); @@ -603,6 +605,7 @@ fs_visitor::emit_alpha_test() { assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); const fs_builder abld = bld.annotate("Alpha test"); fs_inst *cmp; @@ -676,6 +679,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, void fs_visitor::do_emit_fb_writes(int nr_color_regions, bool replicate_alpha) { + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); fs_inst *inst = NULL; for (int target = 0; target < nr_color_regions; target++) { @@ -810,6 +814,8 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) unreachable("invalid stage"); } + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); + fs_reg per_slot_offsets; if (stage == MESA_SHADER_GEOMETRY) { @@ -1084,6 +1090,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) void fs_visitor::emit_urb_fence() { + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD); fs_inst *fence = bld.emit(SHADER_OPCODE_MEMORY_FENCE, dst, brw_vec8_grf(0, 0), @@ -1103,6 +1110,7 @@ void fs_visitor::emit_cs_terminate() { assert(devinfo->ver >= 7); + const fs_builder bld = fs_builder(this, dispatch_width).at_end(); /* We can't directly send from g0, since sends with EOT have to use * g112-127. So, copy it to a virtual register, The register allocator will diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index d0e0b0d1121..1613d117b02 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -3304,7 +3304,7 @@ fs_visitor::lower_uniform_pull_constant_loads() invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); } else if (devinfo->ver >= 7) { const fs_builder ubld = fs_builder(this, block, inst).exec_all(); - fs_reg header = bld.exec_all().group(8, 0).vgrf(BRW_REGISTER_TYPE_UD); + fs_reg header = fs_builder(this, 8).exec_all().vgrf(BRW_REGISTER_TYPE_UD); ubld.group(8, 0).MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));