2f6919e6c2
On MTL ChromeOS boards, during AI based video conference, we were observing a lot of overhead from invalidations. Upon debug, it was found that we were using clflush in this function and that isn't efficient. With this change, while executing compute workloads like zoo models, we are getting ~25% performance improvements in a best case scenario. Rework: * Jordan: Call intel_clflushopt_range() rather than __builtin_ia32_clflushopt() because intel_mem.c is not compiled with -mclflushopt. Backport-to: 24.1 24.2 Signed-off-by: Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30238>
106 lines
3.3 KiB
C
106 lines
3.3 KiB
C
/*
|
|
* Copyright © 2017 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "intel_mem.h"
|
|
#include "util/u_cpu_detect.h"
|
|
|
|
#include <stdint.h>
|
|
|
|
#define CACHELINE_SIZE 64
|
|
#define CACHELINE_MASK 63
|
|
|
|
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
|
|
|
|
#ifdef HAVE___BUILTIN_IA32_CLFLUSHOPT
|
|
void intel_clflushopt_range(void *start, size_t size);
|
|
#endif
|
|
|
|
static void
|
|
intel_clflush_range(void *start, size_t size)
|
|
{
|
|
void *p = (void *) (((uintptr_t) start) & ~CACHELINE_MASK);
|
|
void *end = start + size;
|
|
|
|
while (p < end) {
|
|
__builtin_ia32_clflush(p);
|
|
p += CACHELINE_SIZE;
|
|
}
|
|
}
|
|
|
|
void
|
|
intel_flush_range_no_fence(void *start, size_t size)
|
|
{
|
|
#ifdef HAVE___BUILTIN_IA32_CLFLUSHOPT
|
|
const struct util_cpu_caps_t *cpu_caps = util_get_cpu_caps();
|
|
if (cpu_caps->has_clflushopt) {
|
|
intel_clflushopt_range(start, size);
|
|
return;
|
|
}
|
|
#endif
|
|
intel_clflush_range(start, size);
|
|
}
|
|
|
|
void
|
|
intel_flush_range(void *start, size_t size)
|
|
{
|
|
__builtin_ia32_mfence();
|
|
intel_flush_range_no_fence(start, size);
|
|
#ifdef HAVE___BUILTIN_IA32_CLFLUSHOPT
|
|
/* clflushopt doesn't include an mfence like clflush */
|
|
if (util_get_cpu_caps()->has_clflushopt)
|
|
__builtin_ia32_mfence();
|
|
#endif
|
|
}
|
|
|
|
void
|
|
intel_invalidate_range(void *start, size_t size)
|
|
{
|
|
if (size == 0)
|
|
return;
|
|
|
|
intel_flush_range_no_fence(start, size);
|
|
|
|
/* Modern Atom CPUs (Baytrail+) have issues with clflush serialization,
|
|
* where mfence is not a sufficient synchronization barrier. We must
|
|
* double clflush the last cacheline. This guarantees it will be ordered
|
|
* after the preceding clflushes, and then the mfence guards against
|
|
* prefetches crossing the clflush boundary.
|
|
*
|
|
* See kernel commit 396f5d62d1a5fd99421855a08ffdef8edb43c76e
|
|
* ("drm: Restore double clflush on the last partial cacheline")
|
|
* and https://bugs.freedesktop.org/show_bug.cgi?id=92845.
|
|
*/
|
|
#ifdef HAVE___BUILTIN_IA32_CLFLUSHOPT
|
|
/* clflushopt doesn't include an mfence like clflush */
|
|
if (util_get_cpu_caps()->has_clflushopt) {
|
|
__builtin_ia32_mfence();
|
|
intel_clflushopt_range(start + size - 1, 1);
|
|
__builtin_ia32_mfence();
|
|
return;
|
|
}
|
|
#endif
|
|
__builtin_ia32_clflush(start + size - 1);
|
|
__builtin_ia32_mfence();
|
|
}
|
|
#endif /* SUPPORT_INTEL_INTEGRATED_GPUS */
|