brw: Generalize read_attribute_payload_intel to handle more cases

We were using this for indirect loads of the shader input thread payload, but there's no reason we can't use it for constant access too. In this case we can just MOV from the ATTR file directly without a special opcode that turns into MOV_INDIRECT later. We also allow it to load multiple components now. This is useful for say, returning vec4 pushed inputs. And, we allow it in more stages than just the fragment stage. Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38482>
2025-10-24 15:34:54 -07:00
parent 792762617a
commit 96d331766a
4 changed files with 27 additions and 14 deletions
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -2423,11 +2423,9 @@ image("store_raw_intel", src_comp=[1, 0])
 # Maximum number of polygons processed in the fragment shader
 system_value("max_polygon_intel", 1, bit_sizes=[32])

-# Read the attribute thread payload at a given offset
+# Read the attribute thread payload at a given byte offset
 # src[] = { offset }
-intrinsic("load_attribute_payload_intel", dest_comp=1, bit_sizes=[32],
-          src_comp=[1],
-          flags=[CAN_ELIMINATE, CAN_REORDER])
+load("attribute_payload_intel", [1], flags=[CAN_ELIMINATE, CAN_REORDER])

 # Populate the per-primitive payload at an offset
 # src[] = { value, offset }
--- a/src/intel/compiler/brw/brw_from_nir.cpp
+++ b/src/intel/compiler/brw/brw_from_nir.cpp
@@ -4528,14 +4528,6 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
              brw_dynamic_per_primitive_remap(brw_wm_prog_data(s.prog_data)));
      break;

-   case nir_intrinsic_load_attribute_payload_intel: {
-      const brw_reg offset = retype(
-         bld.emit_uniformize(get_nir_src(ntb, instr->src[0], 0)),
-         BRW_TYPE_UD);
-      bld.emit(SHADER_OPCODE_LOAD_ATTRIBUTE_PAYLOAD, retype(dest, BRW_TYPE_UD), offset);
-      break;
-   }
-
   default:
      brw_from_nir_emit_intrinsic(ntb, bld, instr);
      break;
@@ -5943,6 +5935,29 @@ brw_from_nir_emit_intrinsic(nir_to_brw_state &ntb,
      brw_from_nir_emit_memory_access(ntb, bld, xbld, instr);
      break;

+   case nir_intrinsic_load_attribute_payload_intel: {
+      assert(instr->def.bit_size == 32);
+
+      if (nir_src_is_const(instr->src[0])) {
+         const brw_reg src = byte_offset(brw_attr_reg(0, dest.type),
+                                         nir_src_as_uint(instr->src[0]));
+         brw_reg comps[NIR_MAX_VEC_COMPONENTS];
+         for (unsigned i = 0; i < instr->num_components; i++) {
+            comps[i] = component(src, i);
+         }
+         bld.VEC(dest, comps, instr->num_components);
+      } else {
+         assert(instr->def.num_components == 1);
+
+         const brw_reg offset = retype(
+            bld.emit_uniformize(get_nir_src(ntb, instr->src[0], 0)),
+            BRW_TYPE_UD);
+         bld.emit(SHADER_OPCODE_LOAD_ATTRIBUTE_PAYLOAD,
+                  retype(dest, BRW_TYPE_UD), offset);
+      }
+      break;
+   }
+
   case nir_intrinsic_load_urb_vec4_intel: {
      assert(devinfo->ver < 20);
      brw_reg srcs[URB_LOGICAL_NUM_SRCS];
--- a/src/intel/compiler/brw/brw_nir.c
+++ b/src/intel/compiler/brw/brw_nir.c
@@ -983,7 +983,7 @@ brw_nir_lower_fs_inputs(nir_shader *nir,
         nir_ieq_imm(b, index, INTEL_MSAA_FLAG_PRIMITIVE_ID_INDEX_MESH),
         nir_imm_int(b, 0), per_vertex_offset);
      indirect_primitive_id =
-         nir_load_attribute_payload_intel(b, attribute_offset);
+         nir_load_attribute_payload_intel(b, 1, 32, attribute_offset);
   }

   nir_foreach_shader_in_variable(var, nir) {
--- a/src/intel/compiler/brw/brw_nir_wa_18019110168.c
+++ b/src/intel/compiler/brw/brw_nir_wa_18019110168.c
@@ -603,7 +603,7 @@ brw_nir_frag_convert_attrs_prim_to_vert_indirect(struct nir_shader *nir,
                  brw_nir_vertex_attribute_offset(b, attr_idx, devinfo),
                  per_primitive_stride);
            nir_def *value =
-               nir_load_attribute_payload_intel(b, per_vertex_offset);
+               nir_load_attribute_payload_intel(b, 1, 32, per_vertex_offset);
            /* Write back the values into the per-primitive location */
            nir_store_per_primitive_payload_intel(
               b, value, .base = location, .component = c);