ci/lava: Broader R8152 error handling
The r8152 error detection is now considering any order of the known patterns to detect variations of the r8152 issues during the test phase. This includes a small refactoring for eventual new issues. Additionally, adjusted the timing for setting the `start_time` in `test_lava_job_submitter.py` to ensure consistency and reliability in test execution, aligning the start time closer to the job submission process. With this fix, the bad state shown in the following job will be detected: https://gitlab.freedesktop.org/drm/msm/-/jobs/55033953 Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27688>
This commit is contained in:
committed by
Marge Bot
parent
c6635c09d0
commit
41cd32d10e
@@ -15,6 +15,10 @@ FORCE_UART = bool(getenv("LAVA_FORCE_UART", False))
|
||||
|
||||
# How many times the r8152 error may happen to consider it a known issue.
|
||||
KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER: int = 10
|
||||
KNOWN_ISSUE_R8152_PATTERNS: tuple[str, ...] = (
|
||||
r"r8152 \S+ eth0: Tx status -71",
|
||||
r"nfs: server \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} not responding, still trying",
|
||||
)
|
||||
|
||||
# This is considered noise, since LAVA produces this log after receiving a package of feedback
|
||||
# messages.
|
||||
|
||||
@@ -2,17 +2,28 @@ from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from typing import TYPE_CHECKING, Any, Sequence
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from lava.utils import LogFollower
|
||||
|
||||
from lava.exceptions import MesaCIKnownIssueException
|
||||
from lava.utils.console_format import CONSOLE_LOG
|
||||
from lava.utils.constants import KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER, LOG_DEBUG_FEEDBACK_NOISE
|
||||
from lava.utils.constants import (
|
||||
KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER,
|
||||
LOG_DEBUG_FEEDBACK_NOISE,
|
||||
KNOWN_ISSUE_R8152_PATTERNS,
|
||||
)
|
||||
from lava.utils.log_section import LogSectionType
|
||||
|
||||
|
||||
def search_known_issue_patterns(patterns: Sequence[str], line: str) -> str:
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, line):
|
||||
return pattern
|
||||
return ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class LAVALogHints:
|
||||
log_follower: LogFollower
|
||||
@@ -39,18 +50,17 @@ class LAVALogHints:
|
||||
LogSectionType.LAVA_BOOT,
|
||||
LogSectionType.TEST_CASE,
|
||||
) and line["lvl"] in ("feedback", "target"):
|
||||
if re.search(r"r8152 \S+ eth0: Tx status -71", line["msg"]):
|
||||
self.r8152_issue_consecutive_counter += 1
|
||||
return
|
||||
|
||||
if self.r8152_issue_consecutive_counter >= KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER:
|
||||
if re.search(
|
||||
r"nfs: server \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} not responding, still trying",
|
||||
line["msg"],
|
||||
if search_known_issue_patterns(KNOWN_ISSUE_R8152_PATTERNS, line["msg"]):
|
||||
if (
|
||||
self.r8152_issue_consecutive_counter
|
||||
< KNOWN_ISSUE_R8152_MAX_CONSECUTIVE_COUNTER
|
||||
):
|
||||
self.raise_known_issue(
|
||||
"Probable network issue failure encountered, retrying the job"
|
||||
)
|
||||
self.r8152_issue_consecutive_counter += 1
|
||||
return
|
||||
|
||||
self.raise_known_issue(
|
||||
"Probable network issue failure encountered, retrying the job"
|
||||
)
|
||||
|
||||
# Reset the status, as the `nfs... still trying` complaint was not detected
|
||||
self.r8152_issue_consecutive_counter = 0
|
||||
|
||||
@@ -396,9 +396,9 @@ def test_full_yaml_log(mock_proxy, frozen_time, lava_job_submitter):
|
||||
proxy.scheduler.jobs.logs.side_effect = load_lines()
|
||||
|
||||
proxy.scheduler.jobs.submit = reset_logs
|
||||
start_time = datetime.now()
|
||||
try:
|
||||
time_travel_to_test_time()
|
||||
start_time = datetime.now()
|
||||
retriable_follow_job(proxy, "")
|
||||
finally:
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user