diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index ba307ee6fc7..333d7695ee7 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -921,8 +921,8 @@ static void update_cache_sha1_cpu(struct mesa_sha1 *ctx) * Don't need the cpu cache affinity stuff. The rest * is contained in first 5 dwords. */ - STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 6 * sizeof(uint32_t)); - _mesa_sha1_update(ctx, cpu_caps, 6 * sizeof(uint32_t)); + STATIC_ASSERT(offsetof(struct util_cpu_caps_t, num_L3_caches) == 5 * sizeof(uint32_t)); + _mesa_sha1_update(ctx, cpu_caps, 5 * sizeof(uint32_t)); } static void lp_disk_cache_create(struct llvmpipe_screen *screen) diff --git a/src/util/u_cpu_detect.c b/src/util/u_cpu_detect.c index 13405a6e358..564c5e4d001 100644 --- a/src/util/u_cpu_detect.c +++ b/src/util/u_cpu_detect.c @@ -92,9 +92,15 @@ DEBUG_GET_ONCE_BOOL_OPTION(dump_cpu, "GALLIUM_DUMP_CPU", false) -/* Do not try to access util_cpu_caps directly, call to util_get_cpu_caps instead */ +static struct util_cpu_caps_t util_cpu_caps; +/* Do not try to access _util_cpu_caps_state directly, call to util_get_cpu_caps instead */ +struct _util_cpu_caps_state_t _util_cpu_caps_state = { + .once_flag = ONCE_FLAG_INIT, + .detect_done = 0, +}; + #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) static int has_cpuid(void); #endif @@ -584,8 +590,10 @@ get_cpu_topology(void) #endif } -static void -util_cpu_detect_once(void) +void _util_cpu_detect_once(void); + +void +_util_cpu_detect_once(void) { int available_cpus = 0; int total_cpus = 0; @@ -912,18 +920,8 @@ util_cpu_detect_once(void) printf("util_cpu_caps.num_L3_caches = %u\n", util_cpu_caps.num_L3_caches); printf("util_cpu_caps.num_cpu_mask_bits = %u\n", util_cpu_caps.num_cpu_mask_bits); } + _util_cpu_caps_state.caps = util_cpu_caps; /* This must happen at the end as it's used to guard everything else */ - p_atomic_set(&util_cpu_caps.detect_done, 1); -} - -static once_flag cpu_once_flag = ONCE_FLAG_INIT; - -void _util_cpu_detect_local(void); - -/* Do not call to this function directly, using util_get_cpu_caps instead */ -void -_util_cpu_detect_local(void) -{ - call_once(&cpu_once_flag, util_cpu_detect_once); + p_atomic_set(&_util_cpu_caps_state.detect_done, 1); } diff --git a/src/util/u_cpu_detect.h b/src/util/u_cpu_detect.h index 646b5d0c470..974e35dc806 100644 --- a/src/util/u_cpu_detect.h +++ b/src/util/u_cpu_detect.h @@ -61,12 +61,6 @@ enum cpu_family { typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32]; struct util_cpu_caps_t { - /** - * Initialized to 0 and set to non-zero with an atomic after the entire - * struct has been initialized. - */ - uint32_t detect_done; - /** * Number of CPUs available to the process. * @@ -132,13 +126,23 @@ struct util_cpu_caps_t { util_affinity_mask *L3_affinity_mask; }; +struct _util_cpu_caps_state_t { + once_flag once_flag; + /** + * Initialized to 0 and set to non-zero with an atomic after the entire + * struct has been initialized. + */ + uint32_t detect_done; + struct util_cpu_caps_t caps; +}; + #define U_CPU_INVALID_L3 0xffff static inline ATTRIBUTE_CONST const struct util_cpu_caps_t * util_get_cpu_caps(void) { - extern void _util_cpu_detect_local(void); - extern struct util_cpu_caps_t util_cpu_caps; + extern void _util_cpu_detect_once(void); + extern struct _util_cpu_caps_state_t _util_cpu_caps_state; /* On most CPU architectures, an atomic read is simply a regular memory * load instruction with some extra compiler magic to prevent code @@ -163,10 +167,10 @@ util_get_cpu_caps(void) * sure, but that state is such that it appears to return exactly the same * value with the same internal data every time. */ - if (unlikely(!p_atomic_read(&util_cpu_caps.detect_done))) - _util_cpu_detect_local(); + if (unlikely(!p_atomic_read(&_util_cpu_caps_state.detect_done))) + call_once(&_util_cpu_caps_state.once_flag, _util_cpu_detect_once); - return &util_cpu_caps; + return &_util_cpu_caps_state.caps; } #ifdef __cplusplus