intel: Add executor tool

Add a tool that programs the hardware the minimum amount to be able to execute compute shaders and then executes a script that can perform data manipulation and dispatch execution of the shaders (written in Xe assembly). The goal is to have a tool to experiment directly with certain assembly instructions and the shared units without having to instrument the drivers. To make more convenient to write assembly, a few macros (indicated by the @-symbol) will be processed into the full instruction. For example, the script ``` local r = execute { data={ [42] = 0x100 }, src=[[ @mov g1 42 @read g2 g1 @id g3 add(8) g4<1>UD g2<8,8,1>UD g3<8,8,1>UD { align1 @1 1Q }; @write g3 g4 @eot ]] } dump(r, 4) ``` produces ``` [0x00000000] 0x00000100 0x00000101 0x00000102 0x00000103 ``` There's a help message inside the code that describes the script environment and the macros for assembly sources. Acked-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30062>
2024-07-06 21:44:45 -07:00
parent 6267585778
commit e72bf2d02f
12 changed files with 1729 additions and 0 deletions
@@ -0,0 +1,41 @@
+-- BFI seems available on Gfx9, need to fix the emission code for that.
+check_verx10(110, 120, 125, 200)
+
+function BFI_simulation(a, b, c, d)
+  local width  = a & 0x1F
+  local offset = b & 0x1F
+  local mask   = ((1 << width) - 1) << offset
+  return ((c << offset) & mask) | (d & ~mask)
+end
+
+function BFI(a, b, c, d)
+  local r = execute {
+    data = { [0] = a, b, c, d },
+    src = [[
+      @id   g9
+      @mov  g11  0
+      @mov  g12  1
+      @mov  g13  2
+      @mov  g14  3
+
+      @read g1 g11
+      @read g2 g12
+      @read g3 g13
+      @read g4 g14
+
+      bfi1(8)  g5<1>UD  g1<8,8,1>UD  g2<8,8,1>UD               { align1 @1 1Q };
+      bfi2(8)  g6<1>UD  g5<8,8,1>UD  g3<8,8,1>UD  g4<8,8,1>UD  { align1 @1 1Q };
+
+      @write g9 g6
+      @eot
+    ]],
+  }
+  return r[0]
+end
+
+function Hex(v) return string.format("0x%08x", v) end
+
+local a, b, c, d = 12, 12, 0xAAAAAAAA, 0xBBBBBBBB
+
+print("calculated", Hex(BFI(a, b, c, d)))
+print("expected",   Hex(BFI_simulation(a, b, c, d)))
@@ -0,0 +1,41 @@
+--[[
+
+Execute the example from the Dot Product 4 Accumulate
+instruction as seen in the PRM.
+
+    mov (1) r1.0:d 0x0102037F:d
+    // (char4)(0x1,0x2,0x3,0x7F)
+    mov (1) r2.0:d 50:d
+    dp4a (1) r3.0:d r2:d r1:d r1:d
+    // r3.0 = 50 + (0x1*0x1 + 0x2*0x2 + 0x3*0x3 + 0x7F*0x7F)
+    // = 50 + (1 + 4 + 9 + 16129)
+    // = 16193
+
+--]]
+
+check_ver(12)
+
+function DP4A(a, b, c)
+  local r = c
+  for i = 1, 4 do
+    r = r + a[i] * b[i]
+  end
+  return r
+end
+
+local r = execute {
+  src = [[
+    @id   g9
+
+    @mov  g1  0x0102037F
+    @mov  g2  50
+
+    dp4a(8)  g3<1>UD  g2<8,8,1>UD  g1<8,8,1>UD  g1<8,8,1>UD  { align1 @1 1Q };
+
+    @write g9 g3
+    @eot
+  ]],
+}
+
+print("expected", DP4A({1,2,3,0x7F}, {1,2,3,0x7F}, 50))
+print("calculated", r[0])
@@ -0,0 +1,18 @@
+-- Example from the help message.
+
+local r = execute {
+  data={ [42] = 0x100 },
+  src=[[
+    @mov     g1      42
+    @read    g2      g1
+
+    @id      g3
+
+    add(8)   g4<1>UD  g2<8,8,1>UD  g3<8,8,1>UD  { align1 @1 1Q };
+
+    @write   g3       g4
+    @eot
+  ]]
+}
+
+dump(r, 4)
@@ -0,0 +1,6 @@
+execute {
+  src = [[
+    nop;
+    @eot
+  ]],
+}
@@ -0,0 +1,20 @@
+local data = {}
+for i = 0, 8-1 do
+  data[i] = i * 4
+end
+
+local r = execute {
+  data = data,
+  src = [[
+    @id    g1
+    @read  g3 g1
+
+    add(8) g3<1>UD  g3<8,8,1>UD  0x100UD  { align1 1Q };
+
+    @write g1 g3
+
+    @eot
+  ]],
+}
+
+dump(r, 8)