ci/lava: Broader R8152 error handling

The r8152 error detection is now considering any order of the known
patterns to detect variations of the r8152 issues during the test phase.
This includes a small refactoring for eventual new issues.

Additionally, adjusted the timing for setting the `start_time` in
`test_lava_job_submitter.py` to ensure consistency and reliability in
test execution, aligning the start time closer to the job submission
process.

With this fix, the bad state shown in the following job will be
detected:
https://gitlab.freedesktop.org/drm/msm/-/jobs/55033953

Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27688>
This commit is contained in:
Guilherme Gallo
2024-02-19 15:23:57 -03:00
committed by Marge Bot
parent c6635c09d0
commit 41cd32d10e
3 changed files with 28 additions and 14 deletions
+4
View File
@@ -15,6 +15,10 @@ FORCE_UART = bool(getenv("LAVA_FORCE_UART", False))
# How many times the r8152 error may happen to consider it a known issue.
KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER: int = 10
KNOWN_ISSUE_R8152_PATTERNS: tuple[str, ...] = (
r"r8152 \S+ eth0: Tx status -71",
r"nfs: server \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} not responding, still trying",
)
# This is considered noise, since LAVA produces this log after receiving a package of feedback
# messages.
+23 -13
View File
@@ -2,17 +2,28 @@ from __future__ import annotations
import re
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Sequence
if TYPE_CHECKING:
from lava.utils import LogFollower
from lava.exceptions import MesaCIKnownIssueException
from lava.utils.console_format import CONSOLE_LOG
from lava.utils.constants import KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER, LOG_DEBUG_FEEDBACK_NOISE
from lava.utils.constants import (
KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER,
LOG_DEBUG_FEEDBACK_NOISE,
KNOWN_ISSUE_R8152_PATTERNS,
)
from lava.utils.log_section import LogSectionType
def search_known_issue_patterns(patterns: Sequence[str], line: str) -> str:
for pattern in patterns:
if re.search(pattern, line):
return pattern
return ""
@dataclass
class LAVALogHints:
log_follower: LogFollower
@@ -39,18 +50,17 @@ class LAVALogHints:
LogSectionType.LAVA_BOOT,
LogSectionType.TEST_CASE,
) and line["lvl"] in ("feedback", "target"):
if re.search(r"r8152 \S+ eth0: Tx status -71", line["msg"]):
self.r8152_issue_consecutive_counter += 1
return
if self.r8152_issue_consecutive_counter >= KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER:
if re.search(
r"nfs: server \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} not responding, still trying",
line["msg"],
if search_known_issue_patterns(KNOWN_ISSUE_R8152_PATTERNS, line["msg"]):
if (
self.r8152_issue_consecutive_counter
< KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER
):
self.raise_known_issue(
"Probable network issue failure encountered, retrying the job"
)
self.r8152_issue_consecutive_counter += 1
return
self.raise_known_issue(
"Probable network issue failure encountered, retrying the job"
)
# Reset the status, as the `nfs... still trying` complaint was not detected
self.r8152_issue_consecutive_counter = 0
+1 -1
View File
@@ -396,9 +396,9 @@ def test_full_yaml_log(mock_proxy, frozen_time, lava_job_submitter):
proxy.scheduler.jobs.logs.side_effect = load_lines()
proxy.scheduler.jobs.submit = reset_logs
start_time = datetime.now()
try:
time_travel_to_test_time()
start_time = datetime.now()
retriable_follow_job(proxy, "")
finally:
try: