From 5b8ec015f27e879438216f20198e907419ee2f13 Mon Sep 17 00:00:00 2001
From: Caio Oliveira <caio.oliveira@intel.com>
Date: Tue, 21 Nov 2023 09:47:18 -0800
Subject: [PATCH] intel/compiler: Don't use fs_visitor::bld in remaining places

The remaining users can simply create a new builder at_end() if needed.
In many places a new builder object is already being constructed, so
just give more specific instructions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26323>
---
 src/intel/compiler/brw_fs.cpp                 | 37 +++++++++----------
 .../compiler/brw_fs_combine_constants.cpp     |  2 +-
 src/intel/compiler/brw_fs_nir.cpp             |  1 +
 src/intel/compiler/brw_fs_visitor.cpp         | 10 ++++-
 .../compiler/brw_lower_logical_sends.cpp      |  2 +-
 5 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 60b1f969433..1e740a697ec 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1231,7 +1231,7 @@ fs_visitor::emit_gs_thread_end()
       emit_gs_control_data_bits(this->final_gs_vertex_count);
    }
 
-   const fs_builder abld = bld.annotate("thread end");
+   const fs_builder abld = fs_builder(this, dispatch_width).at_end().annotate("thread end");
    fs_inst *inst;
 
    if (gs_prog_data->static_vertex_count != -1) {
@@ -1285,7 +1285,7 @@ fs_visitor::assign_curb_setup()
       assert(uniform_push_length <= reg_unit(devinfo));
    } else if (is_compute && devinfo->verx10 >= 125) {
       assert(devinfo->has_lsc);
-      fs_builder ubld = bld.exec_all().group(1, 0).at(
+      fs_builder ubld = fs_builder(this, 1).exec_all().at(
          cfg->first_block(), cfg->first_block()->start());
 
       /* The base offset for our push data is passed in as R0.0[31:6]. We have
@@ -1382,7 +1382,7 @@ fs_visitor::assign_curb_setup()
 
    uint64_t want_zero = used & stage_prog_data->zero_push_reg;
    if (want_zero) {
-      fs_builder ubld = bld.exec_all().group(8, 0).at(
+      fs_builder ubld = fs_builder(this, 8).exec_all().at(
          cfg->first_block(), cfg->first_block()->start());
 
       /* push_reg_mask_param is in 32-bit units */
@@ -3307,6 +3307,7 @@ fs_visitor::emit_repclear_shader()
               BRW_VERTICAL_STRIDE_8, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4,
               BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
    bld.exec_all().group(4, 0).MOV(color_output, color_input);
 
    if (key->nr_color_regions > 1) {
@@ -5310,6 +5311,8 @@ fs_visitor::lower_simd_width()
           * we're sure that both cases can be handled.
           */
          const unsigned max_width = MAX2(inst->exec_size, lower_width);
+
+         const fs_builder bld = fs_builder(this, dispatch_width).at_end();
          const fs_builder ibld = bld.at(block, inst)
                                     .exec_all(inst->force_writemask_all)
                                     .group(max_width, inst->group / max_width);
@@ -5584,7 +5587,7 @@ fs_visitor::lower_find_live_channel()
       if (!inst->is_partial_write())
          ibld.emit_undef_for_dst(inst);
 
-      const fs_builder ubld = bld.at(block, inst).exec_all().group(1, 0);
+      const fs_builder ubld = fs_builder(this, block, inst).exec_all().group(1, 0);
 
       /* ce0 doesn't consider the thread dispatch mask (DMask or VMask),
        * so combine the execution and dispatch masks to obtain the true mask.
@@ -5946,19 +5949,6 @@ fs_visitor::optimize()
    /* Start by validating the shader we currently have. */
    validate();
 
-   /* bld is the common builder object pointing at the end of the program we
-    * used to translate it into i965 IR.  For the optimization and lowering
-    * passes coming next, any code added after the end of the program without
-    * having explicitly called fs_builder::at() clearly points at a mistake.
-    * Ideally optimization passes wouldn't be part of the visitor so they
-    * wouldn't have access to bld at all, but they do, so just in case some
-    * pass forgets to ask for a location explicitly set it to NULL here to
-    * make it trip.  The dispatch width is initialized to a bogus value to
-    * make sure that optimizations set the execution controls explicitly to
-    * match the code they are manipulating instead of relying on the defaults.
-    */
-   bld = fs_builder(this, 64);
-
    bool progress = false;
    int iteration = 0;
    int pass_num = 0;
@@ -6138,7 +6128,7 @@ fs_visitor::fixup_sends_duplicate_payload()
          /* Sadly, we've lost all notion of channels and bit sizes at this
           * point.  Just WE_all it.
           */
-         const fs_builder ibld = bld.at(block, inst).exec_all().group(16, 0);
+         const fs_builder ibld = fs_builder(this, block, inst).exec_all().group(16, 0);
          fs_reg copy_src = retype(inst->src[3], BRW_REGISTER_TYPE_UD);
          fs_reg copy_dst = tmp;
          for (unsigned i = 0; i < inst->ex_mlen; i += 2) {
@@ -6242,8 +6232,8 @@ fs_visitor::emit_dummy_mov_instruction()
 
    /* Insert dummy mov as first instruction. */
    const fs_builder ubld =
-      bld.at(cfg->first_block(), first_inst).exec_all().group(8, 0);
-   ubld.MOV(bld.null_reg_ud(), brw_imm_ud(0u));
+      fs_builder(this, cfg->first_block(), (fs_inst *)first_inst).exec_all().group(8, 0);
+   ubld.MOV(ubld.null_reg_ud(), brw_imm_ud(0u));
 
    invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
 }
@@ -6700,6 +6690,7 @@ fs_visitor::set_tcs_invocation_id()
 {
    struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
    struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
 
    const unsigned instance_id_mask =
       (devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) :
@@ -6751,6 +6742,8 @@ fs_visitor::emit_tcs_thread_end()
    if (devinfo->ver != 8 && mark_last_urb_write_with_eot())
       return;
 
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
+
    /* Emit a URB write to end the thread.  On Broadwell, we use this to write
     * zero to the "TR DS Cache Disable" bit (we haven't implemented a fancy
     * algorithm to set it optimally).  On other platforms, we simply write
@@ -6772,6 +6765,7 @@ fs_visitor::run_tcs()
    assert(stage == MESA_SHADER_TESS_CTRL);
 
    struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
 
    assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH ||
           vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_MULTI_PATCH);
@@ -6871,6 +6865,7 @@ fs_visitor::run_gs()
        * Otherwise, we need to initialize it to 0 here.
        */
       if (gs_compile->control_data_header_size_bits <= 32) {
+         const fs_builder bld = fs_builder(this, dispatch_width).at_end();
          const fs_builder abld = bld.annotate("initialize control data bits");
          abld.MOV(this->control_data_bits, brw_imm_ud(0u));
       }
@@ -6933,6 +6928,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
 {
    struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data);
    brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key;
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
 
    assert(stage == MESA_SHADER_FRAGMENT);
 
@@ -7008,6 +7004,7 @@ fs_visitor::run_cs(bool allow_spilling)
 {
    assert(gl_shader_stage_is_compute(stage));
    assert(devinfo->ver >= 7);
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
 
    payload_ = new cs_thread_payload(*this);
 
diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp
index 0dd8769f6bb..b16a95dfbdf 100644
--- a/src/intel/compiler/brw_fs_combine_constants.cpp
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp
@@ -1660,7 +1660,7 @@ fs_visitor::opt_combine_constants()
        * both HF slots within a DWord with the constant.
        */
       const uint32_t width = devinfo->ver == 8 && imm->is_half_float ? 2 : 1;
-      const fs_builder ibld = bld.at(insert_block, n).exec_all().group(width, 0);
+      const fs_builder ibld = fs_builder(this, width).at(insert_block, n).exec_all();
 
       fs_reg reg(VGRF, imm->nr);
       reg.offset = imm->subreg_offset;
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 570793830dc..d0dbfd30d33 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -2218,6 +2218,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
 
    struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
 
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
    const fs_builder abld = bld.annotate("emit control data bits");
    const fs_builder fwa_bld = bld.exec_all();
 
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index 5fd51bad80d..4f344f64ddd 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -94,7 +94,7 @@ fs_visitor::emit_interpolation_setup_gfx4()
 {
    struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
 
-   fs_builder abld = bld.annotate("compute pixel centers");
+   fs_builder abld = fs_builder(this, dispatch_width).at_end().annotate("compute pixel centers");
    this->pixel_x = vgrf(glsl_type::uint_type);
    this->pixel_y = vgrf(glsl_type::uint_type);
    this->pixel_x.type = BRW_REGISTER_TYPE_UW;
@@ -106,6 +106,7 @@ fs_visitor::emit_interpolation_setup_gfx4()
             fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
             fs_reg(brw_imm_v(0x11001100)));
 
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
    abld = bld.annotate("compute pixel deltas from v0");
 
    this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL] =
@@ -151,6 +152,7 @@ fs_visitor::emit_interpolation_setup_gfx4()
 void
 fs_visitor::emit_interpolation_setup_gfx6()
 {
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
    fs_builder abld = bld.annotate("compute pixel centers");
 
    this->pixel_x = vgrf(glsl_type::float_type);
@@ -603,6 +605,7 @@ fs_visitor::emit_alpha_test()
 {
    assert(stage == MESA_SHADER_FRAGMENT);
    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
    const fs_builder abld = bld.annotate("Alpha test");
 
    fs_inst *cmp;
@@ -676,6 +679,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
 void
 fs_visitor::do_emit_fb_writes(int nr_color_regions, bool replicate_alpha)
 {
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
    fs_inst *inst = NULL;
 
    for (int target = 0; target < nr_color_regions; target++) {
@@ -810,6 +814,8 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
       unreachable("invalid stage");
    }
 
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
+
    fs_reg per_slot_offsets;
 
    if (stage == MESA_SHADER_GEOMETRY) {
@@ -1084,6 +1090,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
 void
 fs_visitor::emit_urb_fence()
 {
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
    fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
    fs_inst *fence = bld.emit(SHADER_OPCODE_MEMORY_FENCE, dst,
                              brw_vec8_grf(0, 0),
@@ -1103,6 +1110,7 @@ void
 fs_visitor::emit_cs_terminate()
 {
    assert(devinfo->ver >= 7);
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
 
    /* We can't directly send from g0, since sends with EOT have to use
     * g112-127. So, copy it to a virtual register, The register allocator will
diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp
index d0e0b0d1121..1613d117b02 100644
--- a/src/intel/compiler/brw_lower_logical_sends.cpp
+++ b/src/intel/compiler/brw_lower_logical_sends.cpp
@@ -3304,7 +3304,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
          invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
       } else if (devinfo->ver >= 7) {
          const fs_builder ubld = fs_builder(this, block, inst).exec_all();
-         fs_reg header = bld.exec_all().group(8, 0).vgrf(BRW_REGISTER_TYPE_UD);
+         fs_reg header = fs_builder(this, 8).exec_all().vgrf(BRW_REGISTER_TYPE_UD);
 
          ubld.group(8, 0).MOV(header,
                               retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));