From 5ccb9f46322abc7141b3a972dfde845321d7da1f Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 3 Nov 2023 17:58:16 -0700 Subject: [PATCH] iris: Don't return timestamps modulo 36-bits Our actual timestamp register from the hardware is 36-bits these days. Depending on the clock base, a few of the low bits may be discarded. We scale by 52.08 or so (see intel_device_info_timebase_scale()) in order to convert clock ticks to nanoseconds. This value could take ~43-44 bits to represent. When our timestamp register overflows, the reported value would overflow at some value which isn't a power-of-two. For some reason, when we implemented ARB_timer_query for i965 in 2012, we thought applications would use GL_QUERY_COUNTER_BITS to detect and handle overflow, expecting our timer queries to overflow at a power-of- two value. We tried to hack around this by reporting a 36-bit counter, for what was then a 32-bit(?) timestamp register, with a timebase scale of 80(?)...and reported the nanoseconds modulo 2^36, with some handwaving about wrap around times being close enough. I don't think this is what anyone wants. All other Gallium drivers (except maybe zink) report 64 here, as does the Intel Windows driver. The ARB_timer_query spec defines it as: "If is QUERY_COUNTER_BITS, the implementation-dependent number of bits used to hold the query result for will be placed in . The number of query counter bits may be zero, in which case the counter contains no useful information." and it also mentions about overflow: "If the elapsed time overflows the number of bits, , available to hold elapsed time, its value becomes undefined. It is recommended, but not required, that implementations handle this overflow case by saturating at 2^n - 1." There's nothing about roll-over happening at power-of-two times, just that the value returned has to fit in that many bits, and if the value were to exceed that, it's undefined, optionally saturated to the maximum representable value. This patch makes us report 64 like other drivers, and stop taking the modulus. Technically, our roll-over will happen before we reach the number of query counter bits, which may or may not be valid. But this does let us report longer times, and should be more desirable behavior. Tested-by: Karol Herbst Part-of: --- src/gallium/drivers/iris/iris_defines.h | 3 --- src/gallium/drivers/iris/iris_query.c | 4 +--- src/gallium/drivers/iris/iris_screen.c | 4 ---- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/src/gallium/drivers/iris/iris_defines.h b/src/gallium/drivers/iris/iris_defines.h index fce73551670..9e3e24231c7 100644 --- a/src/gallium/drivers/iris/iris_defines.h +++ b/src/gallium/drivers/iris/iris_defines.h @@ -52,9 +52,6 @@ #define CS_GPR(n) (0x2600 + (n) * 8) -/* The number of bits in our TIMESTAMP queries. */ -#define TIMESTAMP_BITS 36 - /* For gfx12 we set the streamout buffers using 4 separate commands * (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout * of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of diff --git a/src/gallium/drivers/iris/iris_query.c b/src/gallium/drivers/iris/iris_query.c index 6ea9c8917e6..6f724932cf4 100644 --- a/src/gallium/drivers/iris/iris_query.c +++ b/src/gallium/drivers/iris/iris_query.c @@ -282,7 +282,7 @@ static uint64_t iris_raw_timestamp_delta(uint64_t time0, uint64_t time1) { if (time0 > time1) { - return (1ULL << TIMESTAMP_BITS) + time1 - time0; + return (1ull << 36) + time1 - time0; } else { return time1 - time0; } @@ -309,12 +309,10 @@ calculate_result_on_cpu(const struct intel_device_info *devinfo, case PIPE_QUERY_TIMESTAMP_DISJOINT: /* The timestamp is the single starting snapshot. */ q->result = intel_device_info_timebase_scale(devinfo, q->map->start); - q->result &= (1ull << TIMESTAMP_BITS) - 1; break; case PIPE_QUERY_TIME_ELAPSED: q->result = iris_raw_timestamp_delta(q->map->start, q->map->end); q->result = intel_device_info_timebase_scale(devinfo, q->result); - q->result &= (1ull << TIMESTAMP_BITS) - 1; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: q->result = stream_overflowed((void *) q->map, q->index); diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index 5d8c3b49da6..0eb54494b25 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -429,9 +429,6 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param) */ return devinfo->ver >= 11; - case PIPE_CAP_QUERY_TIMESTAMP_BITS: - return TIMESTAMP_BITS; - case PIPE_CAP_TIMER_RESOLUTION: return DIV_ROUND_UP(1000000000ull, devinfo->timestamp_frequency); @@ -657,7 +654,6 @@ iris_get_timestamp(struct pipe_screen *pscreen) return 0; result = intel_device_info_timebase_scale(screen->devinfo, result); - result &= (1ull << TIMESTAMP_BITS) - 1; return result; }