aco/gfx11: allow true 16-bit instructions to access v128+

It looks like the LLVM assembler promotes true 16-bit instructions to VOP3 in this case. No fossil-db changes. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20251>
2022-11-11 19:58:45 +00:00
parent 254b178d5b
commit 6872f8d861
4 changed files with 286 additions and 21 deletions
@@ -98,8 +98,30 @@ reg(asm_context& ctx, Definition def, unsigned width = 32)
   return reg(ctx, def.physReg()) & BITFIELD_MASK(width);
 }

+bool
+needs_vop3_gfx11(asm_context& ctx, Instruction* instr, Operand *dpp_op)
+{
+   if (ctx.gfx_level <= GFX10_3)
+      return false;
+
+   uint8_t mask = get_gfx11_true16_mask(instr->opcode);
+   if (!mask)
+      return false;
+
+   u_foreach_bit (i, mask & 0x3) {
+      if (i == 0 && dpp_op && dpp_op->physReg().reg() >= (256 + 128))
+         return true;
+      if (instr->operands[i].physReg().reg() >= (256 + 128))
+         return true;
+   }
+   if ((mask & 0x8) && instr->definitions[0].physReg().reg() >= (256 + 128))
+      return true;
+   return false;
+}
+
 void
-emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
+emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr,
+                 Operand *dpp_op_ptr = NULL, DPP16_instruction *dpp16_ptr = NULL)
 {
   /* lower remaining pseudo-instructions */
   if (instr->opcode == aco_opcode::p_constaddr_getpc) {
@@ -298,30 +320,80 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
      return;
   }
   case Format::VOP2: {
-      uint32_t encoding = 0;
-      encoding |= opcode << 25;
-      encoding |= reg(ctx, instr->definitions[0], 8) << 17;
-      encoding |= reg(ctx, instr->operands[1], 8) << 9;
-      encoding |= reg(ctx, instr->operands[0]);
-      out.push_back(encoding);
+      if (needs_vop3_gfx11(ctx, instr, dpp_op_ptr)) {
+         if (instr->opcode == aco_opcode::v_fmaak_f16) {
+            opcode = ctx.opcode[(int)aco_opcode::v_fma_f16];
+         } else if (instr->opcode == aco_opcode::v_fmamk_f16) {
+            std::swap(instr->operands[1], instr->operands[2]);
+            opcode = ctx.opcode[(int)aco_opcode::v_fma_f16];
+         } else {
+            opcode += 0x100;
+         }
+
+         uint32_t encoding = (0b110101 << 26);
+         encoding |= opcode << 16;
+         encoding |= reg(ctx, instr->definitions[0], 8);
+         encoding |= dpp16_ptr ? (dpp16_ptr->abs[0] << 8) | (dpp16_ptr->abs[1] << 9) : 0;
+         out.push_back(encoding);
+
+         encoding = reg(ctx, instr->operands[0]);
+         encoding |= reg(ctx, instr->operands[1]) << 9;
+         if (instr->opcode == aco_opcode::v_fmaak_f16 ||
+             instr->opcode == aco_opcode::v_fmamk_f16)
+            encoding |= reg(ctx, instr->operands[2]) << 18;
+         encoding |= dpp16_ptr ? (dpp16_ptr->neg[0] << 29) | (dpp16_ptr->neg[1] << 30) : 0;
+         out.push_back(encoding);
+      } else {
+         uint32_t encoding = 0;
+         encoding |= opcode << 25;
+         encoding |= reg(ctx, instr->definitions[0], 8) << 17;
+         encoding |= reg(ctx, instr->operands[1], 8) << 9;
+         encoding |= reg(ctx, instr->operands[0]);
+         out.push_back(encoding);
+      }
      break;
   }
   case Format::VOP1: {
-      uint32_t encoding = (0b0111111 << 25);
-      if (!instr->definitions.empty())
-         encoding |= reg(ctx, instr->definitions[0], 8) << 17;
-      encoding |= opcode << 9;
-      if (!instr->operands.empty())
-         encoding |= reg(ctx, instr->operands[0]);
-      out.push_back(encoding);
+      if (needs_vop3_gfx11(ctx, instr, dpp_op_ptr)) {
+         uint32_t encoding = (0b110101 << 26);
+         encoding |= (opcode + 0x180) << 16;
+         encoding |= reg(ctx, instr->definitions[0], 8);
+         encoding |= dpp16_ptr ? dpp16_ptr->abs[0] << 8 : 0;
+         out.push_back(encoding);
+
+         encoding = reg(ctx, instr->operands[0]);
+         encoding |= dpp16_ptr ? dpp16_ptr->neg[0] << 29 : 0;
+         out.push_back(encoding);
+      } else {
+         uint32_t encoding = (0b0111111 << 25);
+         if (!instr->definitions.empty())
+            encoding |= reg(ctx, instr->definitions[0], 8) << 17;
+         encoding |= opcode << 9;
+         if (!instr->operands.empty())
+            encoding |= reg(ctx, instr->operands[0]);
+         out.push_back(encoding);
+      }
      break;
   }
   case Format::VOPC: {
-      uint32_t encoding = (0b0111110 << 25);
-      encoding |= opcode << 17;
-      encoding |= reg(ctx, instr->operands[1], 8) << 9;
-      encoding |= reg(ctx, instr->operands[0]);
-      out.push_back(encoding);
+      if (needs_vop3_gfx11(ctx, instr, dpp_op_ptr)) {
+         uint32_t encoding = (0b110101 << 26);
+         encoding |= opcode << 16;
+         encoding |= reg(ctx, instr->definitions[0], 8);
+         encoding |= dpp16_ptr ? (dpp16_ptr->abs[0] << 8) | (dpp16_ptr->abs[1] << 9) : 0;
+         out.push_back(encoding);
+
+         encoding = reg(ctx, instr->operands[0]);
+         encoding |= reg(ctx, instr->operands[1]) << 9;
+         encoding |= dpp16_ptr ? (dpp16_ptr->neg[0] << 29) | (dpp16_ptr->neg[1] << 30) : 0;
+         out.push_back(encoding);
+      } else {
+         uint32_t encoding = (0b0111110 << 25);
+         encoding |= opcode << 17;
+         encoding |= reg(ctx, instr->operands[1], 8) << 9;
+         encoding |= reg(ctx, instr->operands[0]);
+         out.push_back(encoding);
+      }
      break;
   }
   case Format::VINTRP: {
@@ -802,7 +874,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
         Operand dpp_op = instr->operands[0];
         instr->operands[0] = Operand(PhysReg{250}, v1);
         instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP16);
-         emit_instruction(ctx, out, instr);
+         emit_instruction(ctx, out, instr, &dpp_op, &dpp);
         uint32_t encoding = (0xF & dpp.row_mask) << 28;
         encoding |= (0xF & dpp.bank_mask) << 24;
         encoding |= dpp.abs[1] << 23;
@@ -824,7 +896,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
         Operand dpp_op = instr->operands[0];
         instr->operands[0] = Operand(PhysReg{234}, v1);
         instr->format = (Format)((uint16_t)instr->format & ~(uint16_t)Format::DPP8);
-         emit_instruction(ctx, out, instr);
+         emit_instruction(ctx, out, instr, &dpp_op);
         uint32_t encoding = reg(ctx, dpp_op, 8);
         for (unsigned i = 0; i < 8; ++i)
            encoding |= dpp.lane_sel[i] << (8 + i * 3);