From 9481aad815189d6251d36c11e3f901f9179dab40 Mon Sep 17 00:00:00 2001 From: pmikus Date: Sat, 6 Mar 2021 21:03:37 +0000 Subject: Infra: Add reschedule policy Add rechedule policy to jobs so in case of failure they will respawn in a endless loop Signed-off-by: pmikus Change-Id: I15698d9e147644e68bec549fc53474d421b25d9a --- .../alertmanager/conf/nomad/alertmanager.hcl | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'terraform-ci-infra/1n_nmd/alertmanager/conf') diff --git a/terraform-ci-infra/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl b/terraform-ci-infra/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl index 40d84e337a..6b0d669d0e 100644 --- a/terraform-ci-infra/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl +++ b/terraform-ci-infra/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl @@ -71,6 +71,17 @@ job "${job_name}" { %{ endif } } + # The reschedule stanza specifies the group's rescheduling strategy. If + # specified at the job level, the configuration will apply to all groups + # within the job. If the reschedule stanza is present on both the job and the + # group, they are merged with the group stanza taking the highest precedence + # and then the job. + reschedule { + delay = "30s" + delay_function = "constant" + unlimited = true + } + # The "group" stanza defines a series of tasks that should be co-located on # the same Nomad client. Any task within a group will be placed on the same # client. @@ -86,6 +97,18 @@ job "${job_name}" { # to 1. count = ${group_count} + # The restart stanza configures a tasks's behavior on task failure. Restarts + # happen on the client that is running the task. + # + # https://www.nomadproject.io/docs/job-specification/restart + # + restart { + interval = "30m" + attempts = 40 + delay = "15s" + mode = "delay" + } + # The constraint allows restricting the set of eligible nodes. Constraints # may filter on attributes or client metadata. # -- cgit 1.2.3-korg