From 3b6a40af53ac5161aa3a533ea223adc1417f0810 Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Sat, 29 Mar 2025 17:52:10 +0000 Subject: [PATCH] ci: Make all job timeouts explicit Enforce a default job timeout of 1 second, to make jobs which don't explicitly specify a timeout insta-fail, rather than potentially hanging around for an hour. Container builds get the full hour as they can run long and are not run in pre-merge context, and LAVA jobs also get the full hour as they have multiple internal timeout mechanisms which aim to fast-fail jobs once they actually start. However, as they just queue jobs to an external host (shared with other projects like KernelCI), these timeouts aren't reflected into the GitLab CI definitions. Signed-off-by: Daniel Stone Part-of: --- .gitlab-ci.yml | 1 + .gitlab-ci/container/gitlab-ci.yml | 1 + .gitlab-ci/lava/lava-gitlab-ci.yml | 13 +++++++++++++ 3 files changed, 15 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4b38cd0ecdf..7f8972174b0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -124,6 +124,7 @@ variables: DATA_STORAGE_PATH: data_storage default: + timeout: 1m # catch any jobs which don't specify a timeout id_tokens: S3_JWT: aud: https://s3.freedesktop.org diff --git a/.gitlab-ci/container/gitlab-ci.yml b/.gitlab-ci/container/gitlab-ci.yml index 2a5a0fa1cea..1d5208234fe 100644 --- a/.gitlab-ci/container/gitlab-ci.yml +++ b/.gitlab-ci/container/gitlab-ci.yml @@ -52,6 +52,7 @@ .container: stage: container + timeout: 1h extends: - .container+build-rules - .incorporate-templates-commit diff --git a/.gitlab-ci/lava/lava-gitlab-ci.yml b/.gitlab-ci/lava/lava-gitlab-ci.yml index 7fdc774cdb9..23e376db18d 100644 --- a/.gitlab-ci/lava/lava-gitlab-ci.yml +++ b/.gitlab-ci/lava/lava-gitlab-ci.yml @@ -5,6 +5,19 @@ variables: .lava-test: # Cancel job if a newer commit is pushed to the same branch interruptible: true + # The jobs themselves shouldn't actually run for an hour, of course. + # Jobs are picked up greedily by a GitLab CI runner which is deliberately + # overprovisioned compared to the number of available devices. They are + # submitted to the LAVA co-ordinator with a job priority which gives + # pre-merge priority over everyone else. User-submitted and nightly jobs + # can thus spend ages just waiting around in a queue to be run at some + # point as they get pre-empted by other things. + # Non-queue time has strict timeouts for each stage, e.g. for downloading + # the artifacts, booting the device, device setup, running the tests, etc, + # which is handled by LAVA itself. + # So the only reason we should see anyone bouncing off this timeout is due + # to a lack of available devices to run the jobs. + timeout: 1h variables: GIT_STRATEGY: none # testing doesn't build anything from source FDO_CI_CONCURRENT: 6 # should be replaced by per-machine definitions