ci/lava: Improve exception handling
Move exceptions to its own file. Create MesaCITimeoutError and MesaCIRetryError with specific exception data for better exception classification. Avoid the use of `fatal_err` in favor of raising a proper exception. Make _call_proxy exception handling exhaustive, add missing ResponseError treatment. Also, detect JobError during job result parsing. So when a LAVA timeout error happens, it is probably cause by some boot/network issues with a specific device, we can retry the same job in other device with the same device_type. Signed-off-by: Guilherme Gallo <guilherme.gallo@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15938>
This commit is contained in:
committed by
Marge Bot
parent
5fc333d0b6
commit
4ffd21ca70
107
.gitlab-ci/tests/lava/helpers.py
Normal file
107
.gitlab-ci/tests/lava/helpers.py
Normal file
@@ -0,0 +1,107 @@
|
||||
from contextlib import nullcontext as does_not_raise
|
||||
from datetime import datetime, timedelta
|
||||
from itertools import cycle
|
||||
from typing import Callable, Generator, Iterable, Tuple, Union
|
||||
|
||||
import yaml
|
||||
from freezegun import freeze_time
|
||||
from lava.utils.lava_log import (
|
||||
DEFAULT_GITLAB_SECTION_TIMEOUTS,
|
||||
FALLBACK_GITLAB_SECTION_TIMEOUT,
|
||||
LogSectionType,
|
||||
)
|
||||
|
||||
|
||||
def section_timeout(section_type: LogSectionType) -> int:
|
||||
return int(
|
||||
DEFAULT_GITLAB_SECTION_TIMEOUTS.get(
|
||||
section_type, FALLBACK_GITLAB_SECTION_TIMEOUT
|
||||
).total_seconds()
|
||||
)
|
||||
|
||||
|
||||
def create_lava_yaml_msg(
|
||||
dt: Callable = datetime.now, msg="test", lvl="target"
|
||||
) -> dict[str, str]:
|
||||
return {"dt": str(dt()), "msg": msg, "lvl": lvl}
|
||||
|
||||
|
||||
def jobs_logs_response(finished=False, msg=None, **kwargs) -> Tuple[bool, str]:
|
||||
timed_msg = create_lava_yaml_msg(**kwargs)
|
||||
logs = [timed_msg] if msg is None else msg
|
||||
|
||||
return finished, yaml.safe_dump(logs)
|
||||
|
||||
|
||||
def message_generator_new(
|
||||
messages: dict[LogSectionType, Iterable[int]]
|
||||
) -> Iterable[tuple[dict, Iterable[int]]]:
|
||||
default = [1]
|
||||
for section_type in LogSectionType:
|
||||
delay = messages.get(section_type, default)
|
||||
yield mock_lava_signal(section_type), delay
|
||||
|
||||
|
||||
def message_generator():
|
||||
for section_type in LogSectionType:
|
||||
yield mock_lava_signal(section_type)
|
||||
|
||||
|
||||
def generate_n_logs(
|
||||
n=0,
|
||||
tick_fn: Union[Generator, Iterable[int], int] = 1,
|
||||
message_fn=message_generator,
|
||||
):
|
||||
if isinstance(tick_fn, Generator):
|
||||
tick_gen = tick_fn
|
||||
elif isinstance(tick_fn, Iterable):
|
||||
tick_gen = cycle(tick_fn)
|
||||
else:
|
||||
tick_gen = cycle((tick_fn,))
|
||||
|
||||
with freeze_time(datetime.now()) as time_travel:
|
||||
tick_sec: int = next(tick_gen)
|
||||
while True:
|
||||
# Simulate a complete run given by message_fn
|
||||
for msg in message_fn():
|
||||
yield jobs_logs_response(finished=False, msg=[msg])
|
||||
time_travel.tick(tick_sec)
|
||||
|
||||
yield jobs_logs_response(finished=True)
|
||||
|
||||
|
||||
def to_iterable(tick_fn):
|
||||
if isinstance(tick_fn, Generator):
|
||||
tick_gen = tick_fn
|
||||
elif isinstance(tick_fn, Iterable):
|
||||
tick_gen = cycle(tick_fn)
|
||||
else:
|
||||
tick_gen = cycle((tick_fn,))
|
||||
return tick_gen
|
||||
|
||||
|
||||
def mock_logs(
|
||||
messages={},
|
||||
):
|
||||
with freeze_time(datetime.now()) as time_travel:
|
||||
# Simulate a complete run given by message_fn
|
||||
for msg, tick_list in message_generator_new(messages):
|
||||
for tick_sec in tick_list:
|
||||
yield jobs_logs_response(finished=False, msg=[msg])
|
||||
time_travel.tick(tick_sec)
|
||||
|
||||
yield jobs_logs_response(finished=True)
|
||||
|
||||
|
||||
def mock_lava_signal(type: LogSectionType) -> dict[str, str]:
|
||||
return {
|
||||
LogSectionType.TEST_CASE: create_lava_yaml_msg(
|
||||
msg="<STARTTC> case", lvl="debug"
|
||||
),
|
||||
LogSectionType.TEST_SUITE: create_lava_yaml_msg(
|
||||
msg="<STARTRUN> suite", lvl="debug"
|
||||
),
|
||||
LogSectionType.LAVA_POST_PROCESSING: create_lava_yaml_msg(
|
||||
msg="<LAVA_SIGNAL_ENDTC case>", lvl="target"
|
||||
),
|
||||
}.get(type, create_lava_yaml_msg())
|
||||
@@ -32,6 +32,7 @@ from unittest.mock import MagicMock, patch
|
||||
import pytest
|
||||
import yaml
|
||||
from freezegun import freeze_time
|
||||
from lava.exceptions import MesaCIException, MesaCIRetryError, MesaCITimeoutError
|
||||
from lava.lava_job_submitter import (
|
||||
DEVICE_HANGING_TIMEOUT_SEC,
|
||||
NUMBER_OF_RETRIES_TIMEOUT_DETECTION,
|
||||
@@ -120,7 +121,7 @@ def frozen_time(mock_sleep):
|
||||
|
||||
@pytest.mark.parametrize("exception", [RuntimeError, SystemError, KeyError])
|
||||
def test_submit_and_follow_respects_exceptions(mock_sleep, mock_proxy, exception):
|
||||
with pytest.raises(exception):
|
||||
with pytest.raises(MesaCIException):
|
||||
proxy = mock_proxy(side_effect=exception)
|
||||
job = LAVAJob(proxy, '')
|
||||
follow_job_execution(job)
|
||||
@@ -168,7 +169,7 @@ PROXY_SCENARIOS = {
|
||||
),
|
||||
"timed out more times than retry attempts": (
|
||||
generate_n_logs(n=4, tick_fn=DEVICE_HANGING_TIMEOUT_SEC + 1),
|
||||
pytest.raises(SystemExit),
|
||||
pytest.raises(MesaCIRetryError),
|
||||
False,
|
||||
{},
|
||||
),
|
||||
@@ -211,7 +212,7 @@ PROXY_SCENARIOS = {
|
||||
),
|
||||
"very long silence": (
|
||||
generate_n_logs(n=NUMBER_OF_MAX_ATTEMPTS + 1, tick_fn=100000),
|
||||
pytest.raises(SystemExit),
|
||||
pytest.raises(MesaCIRetryError),
|
||||
False,
|
||||
{},
|
||||
),
|
||||
|
||||
Reference in New Issue
Block a user