From 9024e0df8359163818707750ee942e38ad28d41d Mon Sep 17 00:00:00 2001 From: Guilherme Gallo Date: Wed, 28 May 2025 12:00:52 -0300 Subject: [PATCH] ci/lava: Don't fail if the section times mismatches Time drift can occur during LAVA job execution due to transitions between three different clocks. The process begins in the GitLab job [1], using the CI_JOB_STARTED_AT variable. If SSH is enabled, we then connect to the DUT through an Alpine-based SSH client container inside the LAVA dispatcher [2], where some GitLab-related steps are timestamped by lava_job_submitter. Finally, the DUT [3] runs and uses the setup-test-env.sh helper to handle GitLab sections, potentially using a third distinct clock. Signed-off-by: Guilherme Gallo Part-of: --- .gitlab-ci/lava/utils/gitlab_section.py | 40 +++++++++++++----- .gitlab-ci/tests/utils/test_lava_log.py | 55 +++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/.gitlab-ci/lava/utils/gitlab_section.py b/.gitlab-ci/lava/utils/gitlab_section.py index e0371d1ac4a..f4fea438750 100644 --- a/.gitlab-ci/lava/utils/gitlab_section.py +++ b/.gitlab-ci/lava/utils/gitlab_section.py @@ -68,6 +68,9 @@ class GitlabSection: before_header = ":".join([preamble, timestamp, section_id]) if self.timestamp_relative_to and self.start_time is not None: delta = self.start_time - self.timestamp_relative_to + # time drift can occur because we are dealing with timestamps from different sources + # clamp the delta to 0 if it's negative + delta = max(delta, timedelta(seconds=0)) reltime = f"[{floor(delta.seconds / 60):02}:{(delta.seconds % 60):02}] " else: reltime = "" @@ -112,11 +115,14 @@ class GitlabSection: return self.section(marker="start", header=self.header, time=self.__start_time) def end(self) -> str: - assert self.has_started, "Ending an uninitialized section" + assert self.__start_time is not None, "Ending an uninitialized section" self.__end_time = datetime.now(tz=UTC) - assert ( - self.__start_time is not None and self.__end_time >= self.__start_time - ), "Section execution time will be negative" + if self.__end_time < self.__start_time: + print( + CONSOLE_LOG["FG_YELLOW"] + + f"Warning: Section {self.id} ended before it started, clamping the delta time to 0" + + CONSOLE_LOG["RESET"] + ) return self.print_end_section() def print_end_section(self) -> str: @@ -126,12 +132,26 @@ class GitlabSection: raise RuntimeError("End time is not set.") return self.section(marker="end", header="", time=self.__end_time) - def delta_time(self) -> Optional[timedelta]: - if self.__start_time is not None and self.__end_time is not None: - return self.__end_time - self.__start_time + def _delta_time(self) -> Optional[timedelta]: + """ + Return the delta time between the start and end of the section. + If the section has not ended, return the delta time between the start and now. + If the section has not started and not ended, return None. + """ + if self.__start_time is None: + return None - if self.has_started: - assert self.__start_time is not None + if self.__end_time is None: return datetime.now(tz=UTC) - self.__start_time - return None + return self.__end_time - self.__start_time + + def delta_time(self) -> Optional[timedelta]: + """ + Clamp the delta time to zero if it's negative, time drift can occur since we have timestamps + coming from GitLab jobs, LAVA dispatcher and DUTs. + """ + delta = self._delta_time() + if delta is None: + return None + return max(delta, timedelta(seconds=0)) diff --git a/.gitlab-ci/tests/utils/test_lava_log.py b/.gitlab-ci/tests/utils/test_lava_log.py index 887a72dfedf..7554de6a7a3 100644 --- a/.gitlab-ci/tests/utils/test_lava_log.py +++ b/.gitlab-ci/tests/utils/test_lava_log.py @@ -445,3 +445,58 @@ def test_detect_a6xx_gpu_recovery_success(frozen_time): "a6xx_gpu_recovery_fail_counter is not 0" ) + +@pytest.mark.parametrize( + "start_offset", + [ + timedelta(hours=0), + timedelta(hours=1), + ], + ids=["equal timestamps", "negative delta"], +) +def test_gitlab_section_relative_time_clamping(start_offset): + """Test that delta time is clamped to zero if start_time <= timestamp_relative_to.""" + now = datetime.now(tz=UTC) + timestamp_relative_to = now + start_offset + gs = GitlabSection( + id="clamp_section", + header=f"clamp_section header {start_offset}", + type=LogSectionType.TEST_CASE, + timestamp_relative_to=timestamp_relative_to, + ) + gs.start() + output = gs.print_start_section() + assert "[00:00]" in output, f"Expected clamped relative time, got: {output}" + + +@pytest.mark.parametrize( + "delta_seconds,expected_seconds", + [ + (-5, 0), # Negative delta should be clamped to 0 + (0, 0), # Zero delta should remain 0 + (5, 5), # Positive delta should remain unchanged + ], + ids=["negative delta", "zero delta", "positive delta"], +) +def test_gitlab_section_delta_time(frozen_time, delta_seconds, expected_seconds): + """Test that delta_time() properly clamps negative deltas to zero.""" + gs = GitlabSection( + id="delta_section", + header=f"delta_section header {delta_seconds}", + type=LogSectionType.TEST_CASE, + ) + + with gs: + frozen_time.tick(delta_seconds) + + # Test internal _delta_time() returns exact delta + internal_delta = gs._delta_time() + assert internal_delta == timedelta(seconds=delta_seconds), ( + f"_delta_time() returned {internal_delta}, expected {timedelta(seconds=delta_seconds)}" + ) + + # Test public delta_time() returns clamped delta + clamped_delta = gs.delta_time() + assert clamped_delta == timedelta(seconds=expected_seconds), ( + f"delta_time() returned {clamped_delta}, expected {timedelta(seconds=expected_seconds)}" + )