From 41cd32d10eeae282644c7dbf397e95724fe9f7ac Mon Sep 17 00:00:00 2001 From: Guilherme Gallo Date: Mon, 19 Feb 2024 15:23:57 -0300 Subject: [PATCH] ci/lava: Broader R8152 error handling The r8152 error detection is now considering any order of the known patterns to detect variations of the r8152 issues during the test phase. This includes a small refactoring for eventual new issues. Additionally, adjusted the timing for setting the `start_time` in `test_lava_job_submitter.py` to ensure consistency and reliability in test execution, aligning the start time closer to the job submission process. With this fix, the bad state shown in the following job will be detected: https://gitlab.freedesktop.org/drm/msm/-/jobs/55033953 Signed-off-by: Guilherme Gallo Part-of: --- .gitlab-ci/lava/utils/constants.py | 4 +++ .gitlab-ci/lava/utils/lava_log_hints.py | 36 +++++++++++++-------- .gitlab-ci/tests/test_lava_job_submitter.py | 2 +- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/.gitlab-ci/lava/utils/constants.py b/.gitlab-ci/lava/utils/constants.py index 5d58aee1bd9..8a688fb04d3 100644 --- a/.gitlab-ci/lava/utils/constants.py +++ b/.gitlab-ci/lava/utils/constants.py @@ -15,6 +15,10 @@ FORCE_UART = bool(getenv("LAVA_FORCE_UART", False)) # How many times the r8152 error may happen to consider it a known issue. KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER: int = 10 +KNOWN_ISSUE_R8152_PATTERNS: tuple[str, ...] = ( + r"r8152 \S+ eth0: Tx status -71", + r"nfs: server \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} not responding, still trying", +) # This is considered noise, since LAVA produces this log after receiving a package of feedback # messages. diff --git a/.gitlab-ci/lava/utils/lava_log_hints.py b/.gitlab-ci/lava/utils/lava_log_hints.py index d07a8d9f47e..04c158eb34b 100644 --- a/.gitlab-ci/lava/utils/lava_log_hints.py +++ b/.gitlab-ci/lava/utils/lava_log_hints.py @@ -2,17 +2,28 @@ from __future__ import annotations import re from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Sequence if TYPE_CHECKING: from lava.utils import LogFollower from lava.exceptions import MesaCIKnownIssueException from lava.utils.console_format import CONSOLE_LOG -from lava.utils.constants import KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER, LOG_DEBUG_FEEDBACK_NOISE +from lava.utils.constants import ( + KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER, + LOG_DEBUG_FEEDBACK_NOISE, + KNOWN_ISSUE_R8152_PATTERNS, +) from lava.utils.log_section import LogSectionType +def search_known_issue_patterns(patterns: Sequence[str], line: str) -> str: + for pattern in patterns: + if re.search(pattern, line): + return pattern + return "" + + @dataclass class LAVALogHints: log_follower: LogFollower @@ -39,18 +50,17 @@ class LAVALogHints: LogSectionType.LAVA_BOOT, LogSectionType.TEST_CASE, ) and line["lvl"] in ("feedback", "target"): - if re.search(r"r8152 \S+ eth0: Tx status -71", line["msg"]): - self.r8152_issue_consecutive_counter += 1 - return - - if self.r8152_issue_consecutive_counter >= KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER: - if re.search( - r"nfs: server \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} not responding, still trying", - line["msg"], + if search_known_issue_patterns(KNOWN_ISSUE_R8152_PATTERNS, line["msg"]): + if ( + self.r8152_issue_consecutive_counter + < KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER ): - self.raise_known_issue( - "Probable network issue failure encountered, retrying the job" - ) + self.r8152_issue_consecutive_counter += 1 + return + + self.raise_known_issue( + "Probable network issue failure encountered, retrying the job" + ) # Reset the status, as the `nfs... still trying` complaint was not detected self.r8152_issue_consecutive_counter = 0 diff --git a/.gitlab-ci/tests/test_lava_job_submitter.py b/.gitlab-ci/tests/test_lava_job_submitter.py index 3f78ba73309..b599cbdd8ad 100644 --- a/.gitlab-ci/tests/test_lava_job_submitter.py +++ b/.gitlab-ci/tests/test_lava_job_submitter.py @@ -396,9 +396,9 @@ def test_full_yaml_log(mock_proxy, frozen_time, lava_job_submitter): proxy.scheduler.jobs.logs.side_effect = load_lines() proxy.scheduler.jobs.submit = reset_logs - start_time = datetime.now() try: time_travel_to_test_time() + start_time = datetime.now() retriable_follow_job(proxy, "") finally: try: