From 1c045c54b7014697e1fced6ec19ec982f7efa732 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= <pavel.ondracka@gmail.com>
Date: Thu, 1 Jun 2023 09:12:10 +0200
Subject: [PATCH] r300: remove unneeded 64bit and atomic lowering passes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Reviewed-by: Filip Gawin <filip.gawin@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23437>
---
 src/gallium/drivers/r300/compiler/nir_to_rc.c | 296 ------------------
 1 file changed, 296 deletions(-)

diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c b/src/gallium/drivers/r300/compiler/nir_to_rc.c
index 12c4b6e481f..41c5673c906 100644
--- a/src/gallium/drivers/r300/compiler/nir_to_rc.c
+++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c
@@ -235,20 +235,6 @@ ntr_src_as_uint(struct ntr_compile *c, nir_src src)
    return val;
 }
 
-static unsigned
-ntr_64bit_write_mask(unsigned write_mask)
-{
-   return ((write_mask & 1) ? 0x3 : 0) | ((write_mask & 2) ? 0xc : 0);
-}
-
-static struct ureg_src
-ntr_64bit_1f(struct ntr_compile *c)
-{
-   return ureg_imm4u(c->ureg,
-                     0x00000000, 0x3ff00000,
-                     0x00000000, 0x3ff00000);
-}
-
 /* Per-channel masks of def/use within the block, and the per-channel
  * livein/liveout for the block as a whole.
  */
@@ -1537,8 +1523,6 @@ ntr_emit_load_ubo(struct ntr_compile *c, nir_intrinsic_instr *instr)
       }
 
       int start_component = nir_intrinsic_component(instr);
-      if (bit_size == 64)
-         start_component *= 2;
 
       src = ntr_shift_by_frac(src, start_component,
                               instr->num_components * bit_size / 32);
@@ -2461,7 +2445,6 @@ ntr_optimize_nir(struct nir_shader *s, struct pipe_screen *screen,
       progress = false;
 
       NIR_PASS_V(s, nir_lower_vars_to_ssa);
-      NIR_PASS_V(s, nir_split_64bit_vec3_and_vec4);
 
       NIR_PASS(progress, s, nir_copy_prop);
       NIR_PASS(progress, s, nir_opt_algebraic);
@@ -2516,212 +2499,6 @@ ntr_optimize_nir(struct nir_shader *s, struct pipe_screen *screen,
    NIR_PASS_V(s, nir_lower_var_copies);
 }
 
-/* Scalarizes all 64-bit ALU ops.  Note that we only actually need to
- * scalarize vec3/vec4s, should probably fix that.
- */
-static bool
-scalarize_64bit(const nir_instr *instr, const void *data)
-{
-   const nir_alu_instr *alu = nir_instr_as_alu(instr);
-
-   return (alu->def.bit_size == 64 ||
-           nir_src_bit_size(alu->src[0].src) == 64);
-}
-
-static bool
-nir_to_rc_lower_64bit_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
-{
-   b->cursor = nir_after_instr(&instr->instr);
-
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_ubo:
-   case nir_intrinsic_load_ubo_vec4:
-   case nir_intrinsic_load_ssbo:
-   case nir_intrinsic_load_input:
-   case nir_intrinsic_load_interpolated_input:
-   case nir_intrinsic_load_per_vertex_input:
-   case nir_intrinsic_store_output:
-   case nir_intrinsic_store_per_vertex_output:
-   case nir_intrinsic_store_ssbo:
-      break;
-   default:
-      return false;
-   }
-
-   if (instr->num_components <= 2)
-      return false;
-
-   bool has_dest = nir_intrinsic_infos[instr->intrinsic].has_dest;
-   if (has_dest) {
-      if (instr->def.bit_size != 64)
-         return false;
-   } else  {
-      if (nir_src_bit_size(instr->src[0]) != 64)
-          return false;
-   }
-
-   nir_intrinsic_instr *first =
-      nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
-   nir_intrinsic_instr *second =
-      nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
-
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_ubo:
-   case nir_intrinsic_load_ubo_vec4:
-   case nir_intrinsic_load_ssbo:
-   case nir_intrinsic_store_ssbo:
-      break;
-
-   default: {
-      nir_io_semantics semantics = nir_intrinsic_io_semantics(second);
-      semantics.location++;
-      semantics.num_slots--;
-      nir_intrinsic_set_io_semantics(second, semantics);
-
-      nir_intrinsic_set_base(second, nir_intrinsic_base(second) + 1);
-      break;
-   }
-   }
-
-   first->num_components = 2;
-   second->num_components -= 2;
-   if (has_dest) {
-      first->def.num_components = 2;
-      second->def.num_components -= 2;
-   }
-
-   nir_builder_instr_insert(b, &first->instr);
-   nir_builder_instr_insert(b, &second->instr);
-
-   if (has_dest) {
-      /* Merge the two loads' results back into a vector. */
-      nir_scalar channels[4] = {
-         nir_get_scalar(&first->def, 0),
-         nir_get_scalar(&first->def, 1),
-         nir_get_scalar(&second->def, 0),
-         nir_get_scalar(&second->def, second->num_components > 1 ? 1 : 0),
-      };
-      nir_def *new = nir_vec_scalars(b, channels, instr->num_components);
-      nir_def_rewrite_uses(&instr->def, new);
-   } else {
-      /* Split the src value across the two stores. */
-      b->cursor = nir_before_instr(&instr->instr);
-
-      nir_def *src0 = instr->src[0].ssa;
-      nir_scalar channels[4] = { 0 };
-      for (int i = 0; i < instr->num_components; i++)
-         channels[i] = nir_get_scalar(src0, i);
-
-      nir_intrinsic_set_write_mask(first, nir_intrinsic_write_mask(instr) & 3);
-      nir_intrinsic_set_write_mask(second, nir_intrinsic_write_mask(instr) >> 2);
-
-      nir_src_rewrite(&first->src[0], nir_vec_scalars(b, channels, 2));
-      nir_src_rewrite(&second->src[0],
-                      nir_vec_scalars(b, &channels[2], second->num_components));
-   }
-
-   int offset_src = -1;
-   uint32_t offset_amount = 16;
-
-   switch (instr->intrinsic) {
-   case nir_intrinsic_load_ssbo:
-   case nir_intrinsic_load_ubo:
-      offset_src = 1;
-      break;
-   case nir_intrinsic_load_ubo_vec4:
-      offset_src = 1;
-      offset_amount = 1;
-      break;
-   case nir_intrinsic_store_ssbo:
-      offset_src = 2;
-      break;
-   default:
-      break;
-   }
-   if (offset_src != -1) {
-      b->cursor = nir_before_instr(&second->instr);
-      nir_def *second_offset =
-         nir_iadd_imm(b, second->src[offset_src].ssa, offset_amount);
-      nir_src_rewrite(&second->src[offset_src], second_offset);
-   }
-
-   /* DCE stores we generated with no writemask (nothing else does this
-    * currently).
-    */
-   if (!has_dest) {
-      if (nir_intrinsic_write_mask(first) == 0)
-         nir_instr_remove(&first->instr);
-      if (nir_intrinsic_write_mask(second) == 0)
-         nir_instr_remove(&second->instr);
-   }
-
-   nir_instr_remove(&instr->instr);
-
-   return true;
-}
-
-static bool
-nir_to_rc_lower_64bit_load_const(nir_builder *b, nir_load_const_instr *instr)
-{
-   int num_components = instr->def.num_components;
-
-   if (instr->def.bit_size != 64 || num_components <= 2)
-      return false;
-
-   b->cursor = nir_before_instr(&instr->instr);
-
-   nir_load_const_instr *first =
-      nir_load_const_instr_create(b->shader, 2, 64);
-   nir_load_const_instr *second =
-      nir_load_const_instr_create(b->shader, num_components - 2, 64);
-
-   first->value[0] = instr->value[0];
-   first->value[1] = instr->value[1];
-   second->value[0] = instr->value[2];
-   if (num_components == 4)
-      second->value[1] = instr->value[3];
-
-   nir_builder_instr_insert(b, &first->instr);
-   nir_builder_instr_insert(b, &second->instr);
-
-   nir_def *channels[4] = {
-      nir_channel(b, &first->def, 0),
-      nir_channel(b, &first->def, 1),
-      nir_channel(b, &second->def, 0),
-      num_components == 4 ? nir_channel(b, &second->def, 1) : NULL,
-   };
-   nir_def *new = nir_vec(b, channels, num_components);
-   nir_def_rewrite_uses(&instr->def, new);
-   nir_instr_remove(&instr->instr);
-
-   return true;
-}
-
-static bool
-nir_to_rc_lower_64bit_to_vec2_instr(nir_builder *b, nir_instr *instr,
-                                      void *data)
-{
-   switch (instr->type) {
-   case nir_instr_type_load_const:
-      return nir_to_rc_lower_64bit_load_const(b, nir_instr_as_load_const(instr));
-
-   case nir_instr_type_intrinsic:
-      return nir_to_rc_lower_64bit_intrinsic(b, nir_instr_as_intrinsic(instr));
-   default:
-      return false;
-   }
-}
-
-static bool
-nir_to_rc_lower_64bit_to_vec2(nir_shader *s)
-{
-   return nir_shader_instructions_pass(s,
-                                       nir_to_rc_lower_64bit_to_vec2_instr,
-                                       nir_metadata_block_index |
-                                       nir_metadata_dominance,
-                                       NULL);
-}
-
 struct ntr_lower_tex_state {
    nir_scalar channels[8];
    unsigned i;
@@ -2868,32 +2645,6 @@ ntr_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s,
    }
 }
 
-static bool
-ntr_lower_atomic_pre_dec_filter(const nir_instr *instr, const void *_data)
-{
-   return (instr->type == nir_instr_type_intrinsic &&
-           nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_atomic_counter_pre_dec);
-}
-
-static nir_def *
-ntr_lower_atomic_pre_dec_lower(nir_builder *b, nir_instr *instr, void *_data)
-{
-   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-
-   nir_def *old_result = &intr->def;
-   intr->intrinsic = nir_intrinsic_atomic_counter_post_dec;
-
-   return nir_iadd_imm(b, old_result, -1);
-}
-
-static bool
-ntr_lower_atomic_pre_dec(nir_shader *s)
-{
-   return nir_shader_lower_instructions(s,
-                                        ntr_lower_atomic_pre_dec_filter,
-                                        ntr_lower_atomic_pre_dec_lower, NULL);
-}
-
 /* Lowers texture projectors if we can't do them as TGSI_OPCODE_TXP. */
 static void
 nir_to_rc_lower_txp(nir_shader *s)
@@ -2933,36 +2684,6 @@ nir_to_rc_lower_txp(nir_shader *s)
    NIR_PASS_V(s, nir_lower_tex, &lower_tex_options);
 }
 
-static bool
-nir_lower_primid_sysval_to_input_filter(const nir_instr *instr, const void *_data)
-{
-   return (instr->type == nir_instr_type_intrinsic &&
-           nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_primitive_id);
-}
-
-static nir_def *
-nir_lower_primid_sysval_to_input_lower(nir_builder *b, nir_instr *instr, void *data)
-{
-   nir_variable *var = nir_get_variable_with_location(b->shader, nir_var_shader_in,
-                                                      VARYING_SLOT_PRIMITIVE_ID, glsl_uint_type());
-
-   nir_io_semantics semantics = {
-      .location = var->data.location,
-       .num_slots = 1
-   };
-   return nir_load_input(b, 1, 32, nir_imm_int(b, 0),
-                         .base = var->data.driver_location,
-                         .io_semantics = semantics);
-}
-
-static bool
-nir_lower_primid_sysval_to_input(nir_shader *s)
-{
-   return nir_shader_lower_instructions(s,
-                                        nir_lower_primid_sysval_to_input_filter,
-                                        nir_lower_primid_sysval_to_input_lower, NULL);
-}
-
 const void *
 nir_to_rc(struct nir_shader *s,
             struct pipe_screen *screen)
@@ -3009,29 +2730,12 @@ const void *nir_to_rc_options(struct nir_shader *s,
    nir_to_rc_lower_txp(s);
    NIR_PASS_V(s, nir_to_rc_lower_tex);
 
-   /* While TGSI can represent PRIMID as either an input or a system value,
-    * glsl-to-tgsi had the GS (not TCS or TES) primid as an input, and drivers
-    * depend on that.
-    */
-   if (s->info.stage == MESA_SHADER_GEOMETRY)
-      NIR_PASS_V(s, nir_lower_primid_sysval_to_input);
-
-   if (s->info.num_abos)
-      NIR_PASS_V(s, ntr_lower_atomic_pre_dec);
-
    if (!original_options->lower_uniforms_to_ubo) {
       NIR_PASS_V(s, nir_lower_uniforms_to_ubo,
                  screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS),
                  true);
    }
 
-   /* Do lowering so we can directly translate f64/i64 NIR ALU ops to TGSI --
-    * TGSI stores up to a vec2 in each slot, so to avoid a whole bunch of op
-    * duplication logic we just make it so that we only see vec2s.
-    */
-   NIR_PASS_V(s, nir_lower_alu_to_scalar, scalarize_64bit, NULL);
-   NIR_PASS_V(s, nir_to_rc_lower_64bit_to_vec2);
-
    if (!screen->get_param(screen, PIPE_CAP_LOAD_CONSTBUF))
       NIR_PASS_V(s, nir_lower_ubo_vec4);