From 10e62cbe01e6bfda53d183ba7c3f8cefb8665d79 Mon Sep 17 00:00:00 2001
From: Mary Guillemard <mary.guillemard@collabora.com>
Date: Mon, 24 Jun 2024 08:52:54 +0200
Subject: [PATCH] panvk: Report proper workgroup invocation and size

We cannot report a workgroup invocation and size bigger than
MAX_THREADS_PER_WG as splitting into serveral jobs has many limitations
that cannot be overlooked.

As such we limit to the MAX_THREADS_PER_WG property reported by kmod.

Fix "dEQP-VK.compute.pipeline.basic.max_local_size_*" failures.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29872>
---
 src/panfrost/vulkan/panvk_physical_device.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/panfrost/vulkan/panvk_physical_device.c b/src/panfrost/vulkan/panvk_physical_device.c
index b868bebc39c..69a502a419d 100644
--- a/src/panfrost/vulkan/panvk_physical_device.c
+++ b/src/panfrost/vulkan/panvk_physical_device.c
@@ -243,6 +243,11 @@ get_device_properties(const struct panvk_instance *instance,
    uint64_t os_page_size = 4096;
    os_get_page_size(&os_page_size);
 
+   ASSERTED unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
+
+   /* Ensure that the max threads count per workgroup is valid for Bifrost */
+   assert(arch > 8 || device->kmod.props.max_threads_per_wg <= 1024);
+
    *properties = (struct vk_properties){
       .apiVersion = panvk_get_vk_version(),
       .driverVersion = vk_get_driver_version(),
@@ -383,11 +388,14 @@ get_device_properties(const struct panvk_instance *instance,
        * dispatch in several jobs if it's too big.
        */
       .maxComputeWorkGroupCount = {65535, 65535, 65535},
-      /* We have 10 bits to encode the local-size, and there's a minus(1)
-       * modifier, so, a size of 1 takes no bit.
+
+      /* We could also split into serveral jobs but this has many limitations.
+       * As such we limit to the max threads per workgroup supported by the GPU.
        */
-      .maxComputeWorkGroupInvocations = 1 << 10,
-      .maxComputeWorkGroupSize = {1 << 10, 1 << 10, 1 << 10},
+      .maxComputeWorkGroupInvocations = device->kmod.props.max_threads_per_wg,
+      .maxComputeWorkGroupSize = {device->kmod.props.max_threads_per_wg,
+                                  device->kmod.props.max_threads_per_wg,
+                                  device->kmod.props.max_threads_per_wg},
       /* 8-bit subpixel precision. */
       .subPixelPrecisionBits = 8,
       .subTexelPrecisionBits = 8,