diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index ba307ee6fc7..333d7695ee7 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -921,8 +921,8 @@ static void update_cache_sha1_cpu(struct mesa_sha1 *ctx)
     * Don't need the cpu cache affinity stuff. The rest
     * is contained in first 5 dwords.
     */
-   STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 6 * sizeof(uint32_t));
-   _mesa_sha1_update(ctx, cpu_caps, 6 * sizeof(uint32_t));
+   STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 5 * sizeof(uint32_t));
+   _mesa_sha1_update(ctx, cpu_caps, 5 * sizeof(uint32_t));
 }
 
 static void lp_disk_cache_create(struct llvmpipe_screen *screen)
diff --git a/src/util/u_cpu_detect.c b/src/util/u_cpu_detect.c
index 13405a6e358..564c5e4d001 100644
--- a/src/util/u_cpu_detect.c
+++ b/src/util/u_cpu_detect.c
@@ -92,9 +92,15 @@
 
 DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", false)
 
-/* Do not try to access util_cpu_caps directly, call to util_get_cpu_caps instead */
+static
 struct util_cpu_caps_t util_cpu_caps;
 
+/* Do not try to access _util_cpu_caps_state directly, call to util_get_cpu_caps instead */
+struct _util_cpu_caps_state_t _util_cpu_caps_state = {
+   .once_flag = ONCE_FLAG_INIT,
+   .detect_done = 0,
+};
+
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
 static int has_cpuid(void);
 #endif
@@ -584,8 +590,10 @@ get_cpu_topology(void)
 #endif
 }
 
-static void
-util_cpu_detect_once(void)
+void _util_cpu_detect_once(void);
+
+void
+_util_cpu_detect_once(void)
 {
    int available_cpus = 0;
    int total_cpus = 0;
@@ -912,18 +920,8 @@ util_cpu_detect_once(void)
       printf("util_cpu_caps.num_L3_caches = %u\n", util_cpu_caps.num_L3_caches);
       printf("util_cpu_caps.num_cpu_mask_bits = %u\n", util_cpu_caps.num_cpu_mask_bits);
    }
+   _util_cpu_caps_state.caps = util_cpu_caps;
 
    /* This must happen at the end as it's used to guard everything else */
-   p_atomic_set(&util_cpu_caps.detect_done, 1);
-}
-
-static once_flag cpu_once_flag = ONCE_FLAG_INIT;
-
-void _util_cpu_detect_local(void);
-
-/* Do not call to this function directly, using util_get_cpu_caps instead */
-void
-_util_cpu_detect_local(void)
-{
-   call_once(&cpu_once_flag, util_cpu_detect_once);
+   p_atomic_set(&_util_cpu_caps_state.detect_done, 1);
 }
diff --git a/src/util/u_cpu_detect.h b/src/util/u_cpu_detect.h
index 646b5d0c470..974e35dc806 100644
--- a/src/util/u_cpu_detect.h
+++ b/src/util/u_cpu_detect.h
@@ -61,12 +61,6 @@ enum cpu_family {
 typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32];
 
 struct util_cpu_caps_t {
-   /**
-    * Initialized to 0 and set to non-zero with an atomic after the entire
-    * struct has been initialized.
-    */
-   uint32_t detect_done;
-
    /**
     * Number of CPUs available to the process.
     *
@@ -132,13 +126,23 @@ struct util_cpu_caps_t {
    util_affinity_mask *L3_affinity_mask;
 };
 
+struct _util_cpu_caps_state_t {
+   once_flag once_flag;
+   /**
+    * Initialized to 0 and set to non-zero with an atomic after the entire
+    * struct has been initialized.
+    */
+   uint32_t detect_done;
+   struct util_cpu_caps_t caps;
+};
+
 #define U_CPU_INVALID_L3 0xffff
 
 static inline ATTRIBUTE_CONST const struct util_cpu_caps_t *
 util_get_cpu_caps(void)
 {
-   extern void _util_cpu_detect_local(void);
-   extern struct util_cpu_caps_t util_cpu_caps;
+   extern void _util_cpu_detect_once(void);
+   extern struct _util_cpu_caps_state_t _util_cpu_caps_state;
 
    /* On most CPU architectures, an atomic read is simply a regular memory
     * load instruction with some extra compiler magic to prevent code
@@ -163,10 +167,10 @@ util_get_cpu_caps(void)
     * sure, but that state is such that it appears to return exactly the same
     * value with the same internal data every time.
     */
-   if (unlikely(!p_atomic_read(&util_cpu_caps.detect_done)))
-      _util_cpu_detect_local();
+   if (unlikely(!p_atomic_read(&_util_cpu_caps_state.detect_done)))
+      call_once(&_util_cpu_caps_state.once_flag, _util_cpu_detect_once);
 
-   return &util_cpu_caps;
+   return &_util_cpu_caps_state.caps;
 }
 
 #ifdef __cplusplus