aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/versions.tf (renamed from fdio.infra.terraform/1n_nmd/alertmanager/providers.tf)4
-rw-r--r--fdio.infra.terraform/1n_nmd/grafana/versions.tf (renamed from fdio.infra.terraform/1n_nmd/grafana/providers.tf)4
-rw-r--r--fdio.infra.terraform/1n_nmd/main.tf121
-rw-r--r--fdio.infra.terraform/1n_nmd/minio_s3_gateway/conf/nomad/minio.hcl246
-rw-r--r--fdio.infra.terraform/1n_nmd/minio_s3_gateway/main.tf51
-rw-r--r--fdio.infra.terraform/1n_nmd/minio_s3_gateway/variables.tf199
-rw-r--r--fdio.infra.terraform/1n_nmd/minio_s3_gateway/versions.tf (renamed from fdio.infra.terraform/1n_nmd/prometheus/providers.tf)4
-rw-r--r--fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl7
-rw-r--r--fdio.infra.terraform/1n_nmd/prometheus/versions.tf (renamed from fdio.infra.terraform/1n_nmd/vpp_device/providers.tf)4
-rw-r--r--fdio.infra.terraform/1n_nmd/providers.tf26
-rw-r--r--fdio.infra.terraform/1n_nmd/terraform.tfstate631
-rw-r--r--fdio.infra.terraform/1n_nmd/terraform.tfstate.backup635
-rw-r--r--fdio.infra.terraform/1n_nmd/variables.tf32
-rw-r--r--fdio.infra.terraform/1n_nmd/versions.tf21
-rw-r--r--fdio.infra.terraform/1n_nmd/vpp_device/versions.tf13
-rw-r--r--resources/tools/storage/__main__.py8
16 files changed, 661 insertions, 1345 deletions
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/providers.tf b/fdio.infra.terraform/1n_nmd/alertmanager/versions.tf
index 1399201d21..b80610a525 100644
--- a/fdio.infra.terraform/1n_nmd/alertmanager/providers.tf
+++ b/fdio.infra.terraform/1n_nmd/alertmanager/versions.tf
@@ -2,11 +2,11 @@ terraform {
required_providers {
nomad = {
source = "hashicorp/nomad"
- version = "~> 1.4.9"
+ version = "~> 1.4.15"
}
template = {
source = "hashicorp/template"
- version = "~> 2.1.2"
+ version = "~> 2.2.0"
}
}
required_version = ">= 1.0.3"
diff --git a/fdio.infra.terraform/1n_nmd/grafana/providers.tf b/fdio.infra.terraform/1n_nmd/grafana/versions.tf
index 1399201d21..b80610a525 100644
--- a/fdio.infra.terraform/1n_nmd/grafana/providers.tf
+++ b/fdio.infra.terraform/1n_nmd/grafana/versions.tf
@@ -2,11 +2,11 @@ terraform {
required_providers {
nomad = {
source = "hashicorp/nomad"
- version = "~> 1.4.9"
+ version = "~> 1.4.15"
}
template = {
source = "hashicorp/template"
- version = "~> 2.1.2"
+ version = "~> 2.2.0"
}
}
required_version = ">= 1.0.3"
diff --git a/fdio.infra.terraform/1n_nmd/main.tf b/fdio.infra.terraform/1n_nmd/main.tf
index a8a1bb9315..ed4f2b5ac3 100644
--- a/fdio.infra.terraform/1n_nmd/main.tf
+++ b/fdio.infra.terraform/1n_nmd/main.tf
@@ -60,62 +60,91 @@ module "grafana" {
grafana_port = 3000
}
-module "minio" {
- source = "./minio"
+#module "minio" {
+# source = "./minio"
+# providers = {
+# nomad = nomad.yul1
+# }
+#
+# # nomad
+# nomad_datacenters = ["yul1"]
+# nomad_host_volume = "prod-volume-data1-1"
+#
+# # minio
+# minio_job_name = "prod-minio"
+# minio_group_count = 4
+# minio_service_name = "storage"
+# minio_host = "http://10.32.8.1{4...7}"
+# minio_port = 9000
+# minio_container_image = "minio/minio:RELEASE.2021-07-27T02-40-15Z"
+# minio_vault_secret = {
+# use_vault_provider = false,
+# vault_kv_policy_name = "kv-secret",
+# vault_kv_path = "secret/data/minio",
+# vault_kv_field_access_key = "access_key",
+# vault_kv_field_secret_key = "secret_key"
+# }
+# minio_data_dir = "/data/"
+# minio_use_host_volume = true
+# minio_use_canary = true
+# minio_envs = ["MINIO_BROWSER=\"off\""]
+#
+# minio_buckets = ["logs.fd.io"]
+#}
+
+data "vault_generic_secret" "minio_creds" {
+ path = "kv/secret/data/minio"
+}
+
+module "minio_s3_gateway" {
+ source = "./minio_s3_gateway"
providers = {
nomad = nomad.yul1
}
# nomad
- nomad_datacenters = ["yul1"]
- nomad_host_volume = "prod-volume-data1-1"
+ datacenters = ["yul1"]
+ volume_source = "prod-volume-data1-1"
# minio
- minio_job_name = "prod-minio"
- minio_group_count = 4
- minio_service_name = "storage"
- minio_host = "http://10.32.8.1{4...7}"
- minio_port = 9000
- minio_container_image = "minio/minio:RELEASE.2021-07-27T02-40-15Z"
- minio_vault_secret = {
- use_vault_provider = false,
- vault_kv_policy_name = "kv-secret",
- vault_kv_path = "secret/data/minio",
- vault_kv_field_access_key = "access_key",
- vault_kv_field_secret_key = "secret_key"
- }
- minio_data_dir = "/data/"
- minio_use_host_volume = true
- minio_use_canary = true
- minio_envs = ["MINIO_BROWSER=\"off\""]
-
- # minio client
- mc_job_name = "prod-mc"
- mc_container_image = "minio/mc:RELEASE.2021-07-27T02-40-15Z"
- mc_extra_commands = [
- "mc policy set public LOCALMINIO/logs.fd.io",
- "mc policy set public LOCALMINIO/docs.fd.io",
- "mc ilm add --expiry-days '180' LOCALMINIO/logs.fd.io",
- "mc admin user add LOCALMINIO storage Storage1234",
- "mc admin policy set LOCALMINIO writeonly user=storage"
+ job_name = "minio-s3-gateway"
+ group_count = 4
+ service_name = "minio"
+ mode = "gateway"
+ port_base = 9001
+ port_console = 9002
+ image = "minio/minio:latest"
+ access_key = data.vault_generic_secret.minio_creds.data["access_key"]
+ secret_key = data.vault_generic_secret.minio_creds.data["secret_key"]
+ volume_destination = "/data/"
+ use_host_volume = true
+ use_canary = true
+ envs = [
+ "MINIO_BROWSER=\"off\"",
+ "MINIO_CACHE=\"on\"",
+ "MINIO_CACHE_DRIVES=\"/data/s3_cache1\"",
+ "MINIO_CACHE_EXCLUDE=\"\"",
+ "MINIO_CACHE_QUOTA=80",
+ "MINIO_CACHE_AFTER=1",
+ "MINIO_CACHE_WATERMARK_LOW=70",
+ "MINIO_CACHE_WATERMARK_HIGH=90"
]
- minio_buckets = ["logs.fd.io", "docs.fd.io"]
}
-module "nginx" {
- source = "./nginx"
- providers = {
- nomad = nomad.yul1
- }
-
- # nomad
- nomad_datacenters = ["yul1"]
- nomad_host_volume = "prod-volume-data1-1"
-
- # nginx
- nginx_job_name = "prod-nginx"
- nginx_use_host_volume = true
-}
+#module "nginx" {
+# source = "./nginx"
+# providers = {
+# nomad = nomad.yul1
+# }
+#
+# # nomad
+# nomad_datacenters = ["yul1"]
+# nomad_host_volume = "prod-volume-data1-1"
+#
+# # nginx
+# nginx_job_name = "prod-nginx"
+# nginx_use_host_volume = true
+#}
module "prometheus" {
source = "./prometheus"
diff --git a/fdio.infra.terraform/1n_nmd/minio_s3_gateway/conf/nomad/minio.hcl b/fdio.infra.terraform/1n_nmd/minio_s3_gateway/conf/nomad/minio.hcl
new file mode 100644
index 0000000000..6210040b0c
--- /dev/null
+++ b/fdio.infra.terraform/1n_nmd/minio_s3_gateway/conf/nomad/minio.hcl
@@ -0,0 +1,246 @@
+job "${job_name}" {
+ # The "region" parameter specifies the region in which to execute the job.
+ # If omitted, this inherits the default region name of "global".
+ # region = "${region}"
+
+ # The "datacenters" parameter specifies the list of datacenters which should
+ # be considered when placing this task. This must be provided.
+ datacenters = "${datacenters}"
+
+ # The "type" parameter controls the type of job, which impacts the scheduler's
+ # decision on placement. This configuration is optional and defaults to
+ # "service". For a full list of job types and their differences, please see
+ # the online documentation.
+ #
+ # https://www.nomadproject.io/docs/jobspec/schedulers
+ #
+ type = "service"
+
+ update {
+ # The "max_parallel" parameter specifies the maximum number of updates to
+ # perform in parallel. In this case, this specifies to update a single task
+ # at a time.
+ max_parallel = ${max_parallel}
+
+ health_check = "checks"
+
+ # The "min_healthy_time" parameter specifies the minimum time the allocation
+ # must be in the healthy state before it is marked as healthy and unblocks
+ # further allocations from being updated.
+ min_healthy_time = "10s"
+
+ # The "healthy_deadline" parameter specifies the deadline in which the
+ # allocation must be marked as healthy after which the allocation is
+ # automatically transitioned to unhealthy. Transitioning to unhealthy will
+ # fail the deployment and potentially roll back the job if "auto_revert" is
+ # set to true.
+ healthy_deadline = "3m"
+
+ # The "progress_deadline" parameter specifies the deadline in which an
+ # allocation must be marked as healthy. The deadline begins when the first
+ # allocation for the deployment is created and is reset whenever an allocation
+ # as part of the deployment transitions to a healthy state. If no allocation
+ # transitions to the healthy state before the progress deadline, the
+ # deployment is marked as failed.
+ progress_deadline = "10m"
+
+%{ if use_canary }
+ # The "canary" parameter specifies that changes to the job that would result
+ # in destructive updates should create the specified number of canaries
+ # without stopping any previous allocations. Once the operator determines the
+ # canaries are healthy, they can be promoted which unblocks a rolling update
+ # of the remaining allocations at a rate of "max_parallel".
+ #
+ # Further, setting "canary" equal to the count of the task group allows
+ # blue/green deployments. When the job is updated, a full set of the new
+ # version is deployed and upon promotion the old version is stopped.
+ canary = ${canary}
+
+ # Specifies if the job should auto-promote to the canary version when all
+ # canaries become healthy during a deployment. Defaults to false which means
+ # canaries must be manually updated with the nomad deployment promote
+ # command.
+ auto_promote = ${auto_promote}
+
+ # The "auto_revert" parameter specifies if the job should auto-revert to the
+ # last stable job on deployment failure. A job is marked as stable if all the
+ # allocations as part of its deployment were marked healthy.
+ auto_revert = ${auto_revert}
+%{ endif }
+ }
+
+ # All groups in this job should be scheduled on different hosts.
+ constraint {
+ operator = "distinct_hosts"
+ value = "true"
+ }
+
+ # The "group" stanza defines a series of tasks that should be co-located on
+ # the same Nomad client. Any task within a group will be placed on the same
+ # client.
+ #
+ # https://www.nomadproject.io/docs/job-specification/group
+ #
+ group "${job_name}-group-1" {
+ # The "count" parameter specifies the number of the task groups that should
+ # be running under this group. This value must be non-negative and defaults
+ # to 1.
+ count = ${group_count}
+
+ # The volume stanza allows the group to specify that it requires a given
+ # volume from the cluster. The key of the stanza is the name of the volume
+ # as it will be exposed to task configuration.
+ #
+ # https://www.nomadproject.io/docs/job-specification/volume
+ %{ if use_host_volume }
+ volume "${job_name}-volume-1" {
+ type = "host"
+ read_only = false
+ source = "${volume_source}"
+ }
+ %{ endif }
+
+ # The restart stanza configures a tasks's behavior on task failure. Restarts
+ # happen on the client that is running the task.
+ #
+ # https://www.nomadproject.io/docs/job-specification/restart
+ #
+ restart {
+ interval = "30m"
+ attempts = 40
+ delay = "15s"
+ mode = "delay"
+ }
+
+ # The network stanza specifies the networking requirements for the task
+ # group, including the network mode and port allocations. When scheduling
+ # jobs in Nomad they are provisioned across your fleet of machines along
+ # with other jobs and services. Because you don't know in advance what host
+ # your job will be provisioned on, Nomad will provide your tasks with
+ # network configuration when they start up.
+ #
+ # https://www.nomadproject.io/docs/job-specification/network
+ #
+ network {
+ port "base" {
+ static = ${port_base}
+ to = ${port_base}
+ }
+ port "console" {
+ static = ${port_console}
+ to = ${port_console}
+ }
+ }
+
+ # The "task" stanza creates an individual unit of work, such as a Docker
+ # container, web application, or batch processing.
+ #
+ # https://www.nomadproject.io/docs/job-specification/task.html
+ #
+ task "${job_name}-task-1" {
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "exec"
+
+ %{ if use_host_volume }
+ volume_mount {
+ volume = "${job_name}-volume-1"
+ destination = "${volume_destination}"
+ read_only = false
+ }
+ %{ endif }
+
+ %{ if use_vault_provider }
+ vault {
+ policies = "${vault_kv_policy_name}"
+ }
+ %{ endif }
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ args = [
+ "${mode}", "s3",
+ "-address", ":${port_base}",
+ "-console-address", ":${port_console}"
+ ]
+ command = "local/minio"
+ }
+
+ # The artifact stanza instructs Nomad to fetch and unpack a remote resource,
+ # such as a file, tarball, or binary. Nomad downloads artifacts using the
+ # popular go-getter library, which permits downloading artifacts from a
+ # variety of locations using a URL as the input source.
+ #
+ # For more information and examples on the "artifact" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/artifact
+ #
+ artifact {
+ source = "https://dl.min.io/server/minio/release/linux-amd64/minio"
+ }
+
+ # The env stanza configures a list of environment variables to populate
+ # the task's environment before starting.
+ env {
+%{ if use_vault_provider }
+{{ with secret "${vault_kv_path}" }}
+ MINIO_ROOT_USER = "{{ .Data.data.${vault_kv_field_access_key} }}"
+ MINIO_ROOT_PASSWORD = "{{ .Data.data.${vault_kv_field_secret_key} }}"
+{{ end }}
+%{ else }
+ MINIO_ROOT_USER = "${access_key}"
+ MINIO_ROOT_PASSWORD = "${secret_key}"
+ AWS_ACCESS_KEY_ID = "${access_key}"
+ AWS_SECRET_ACCESS_KEY = "${secret_key}"
+%{ endif }
+ ${ envs }
+ }
+
+ # The service stanza instructs Nomad to register a service with Consul.
+ #
+ # https://www.nomadproject.io/docs/job-specification/service
+ #
+ service {
+ name = "${service_name}"
+ port = "base"
+ tags = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ]
+ check {
+ name = "Min.io Server HTTP Check Live"
+ type = "http"
+ port = "base"
+ protocol = "http"
+ method = "GET"
+ path = "/minio/health/live"
+ interval = "10s"
+ timeout = "2s"
+ }
+ check {
+ name = "Min.io Server HTTP Check Ready"
+ type = "http"
+ port = "base"
+ protocol = "http"
+ method = "GET"
+ path = "/minio/health/ready"
+ interval = "10s"
+ timeout = "2s"
+ }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = ${cpu}
+ memory = ${memory}
+ }
+ }
+ }
+}
diff --git a/fdio.infra.terraform/1n_nmd/minio_s3_gateway/main.tf b/fdio.infra.terraform/1n_nmd/minio_s3_gateway/main.tf
new file mode 100644
index 0000000000..2ae3cac9c2
--- /dev/null
+++ b/fdio.infra.terraform/1n_nmd/minio_s3_gateway/main.tf
@@ -0,0 +1,51 @@
+locals {
+ datacenters = join(",", var.datacenters)
+ envs = join("\n", concat([], var.envs))
+ upstreams = jsonencode(var.upstreams)
+}
+
+data "template_file" "nomad_job_minio" {
+ template = file("${path.module}/conf/nomad/minio.hcl")
+ vars = {
+ access_key = var.access_key
+ auto_promote = var.auto_promote
+ auto_revert = var.auto_revert
+ canary = var.canary
+ cpu = var.cpu
+ cpu_proxy = var.resource_proxy.cpu
+ datacenters = local.datacenters
+ envs = local.envs
+ group_count = var.group_count
+ host = var.host
+ image = var.image
+ job_name = var.job_name
+ max_parallel = var.max_parallel
+ memory = var.memory
+ memory_proxy = var.resource_proxy.memory
+ mode = var.mode
+ port_base = var.port_base
+ port_console = var.port_console
+ region = var.region
+ secret_key = var.secret_key
+ service_name = var.service_name
+ use_canary = var.use_canary
+ use_host_volume = var.use_host_volume
+ upstreams = local.upstreams
+ use_vault_kms = var.kms_variables.use_vault_kms
+ use_vault_provider = var.vault_secret.use_vault_provider
+ vault_address = var.kms_variables.vault_address
+ vault_kms_approle_kv = var.kms_variables.vault_kms_approle_kv
+ vault_kms_key_name = var.kms_variables.vault_kms_key_name
+ vault_kv_policy_name = var.vault_secret.vault_kv_policy_name
+ vault_kv_path = var.vault_secret.vault_kv_path
+ vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key
+ vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key
+ volume_destination = var.volume_destination
+ volume_source = var.volume_source
+ }
+}
+
+resource "nomad_job" "nomad_job_minio" {
+ jobspec = data.template_file.nomad_job_minio.rendered
+ detach = false
+}
diff --git a/fdio.infra.terraform/1n_nmd/minio_s3_gateway/variables.tf b/fdio.infra.terraform/1n_nmd/minio_s3_gateway/variables.tf
new file mode 100644
index 0000000000..b9f790b01f
--- /dev/null
+++ b/fdio.infra.terraform/1n_nmd/minio_s3_gateway/variables.tf
@@ -0,0 +1,199 @@
+# Nomad
+
+variable "datacenters" {
+ description = "Specifies the list of DCs to be considered placing this task"
+ type = list(string)
+ default = ["dc1"]
+}
+
+variable "region" {
+ description = "Specifies the list of DCs to be considered placing this task"
+ type = string
+ default = "global"
+}
+
+variable "volume_source" {
+ description = "The name of the volume to request"
+ type = string
+ default = "persistence"
+}
+
+# Minio
+variable "access_key" {
+ description = "Minio access key"
+ type = string
+ default = "minio"
+}
+
+variable "auto_promote" {
+ description = "Specifies if the job should auto-promote to the canary version"
+ type = bool
+ default = true
+}
+
+variable "auto_revert" {
+ description = "Specifies if the job should auto-revert to the last stable job"
+ type = bool
+ default = true
+}
+
+variable "canary" {
+ description = "Equal to the count of the task group allows blue/green depl."
+ type = number
+ default = 1
+}
+
+variable "cpu" {
+ description = "Specifies the CPU required to run this task in MHz"
+ type = number
+ default = 1000
+}
+
+variable "envs" {
+ description = "Minio environment variables"
+ type = list(string)
+ default = []
+}
+
+variable "group_count" {
+ description = "Specifies the number of the task groups running under this one"
+ type = number
+ default = 1
+}
+
+variable "host" {
+ description = "Minio host"
+ type = string
+ default = "127.0.0.1"
+}
+
+variable "image" {
+ description = "The Docker image to run"
+ type = string
+ default = "minio/minio:latest"
+}
+
+variable "job_name" {
+ description = "Specifies a name for the job"
+ type = string
+ default = "minio"
+}
+
+variable "kms_variables" {
+ type = object({
+ use_vault_kms = string
+ vault_address = string,
+ vault_kms_approle_kv = string,
+ vault_kms_key_name = string
+ })
+ description = "Set of properties to be able to transit secrets in vault"
+ default = {
+ use_vault_kms = false
+ vault_address = "",
+ vault_kms_approle_kv = "",
+ vault_kms_key_name = ""
+ }
+}
+
+variable "max_parallel" {
+ description = "Specifies the maximum number of updates to perform in parallel"
+ type = number
+ default = 1
+}
+
+variable "memory" {
+ description = "Specifies the memory required in MB"
+ type = number
+ default = 1024
+}
+
+variable "mode" {
+ description = "Specifies the Minio mode"
+ type = string
+ default = "server"
+}
+
+variable "port_base" {
+ description = "Specifies the static TCP/UDP port to allocate"
+ type = number
+ default = 9000
+}
+
+variable "port_console" {
+ description = "Specifies the static TCP/UDP port to allocate"
+ type = number
+ default = 9001
+}
+
+variable "resource_proxy" {
+ description = "Minio proxy resources"
+ type = object({
+ cpu = number,
+ memory = number
+ })
+ default = {
+ cpu = 2000,
+ memory = 1024
+ }
+ validation {
+ condition = var.resource_proxy.cpu >= 200 && var.resource_proxy.memory >= 128
+ error_message = "Proxy resource must be at least: cpu=200, memory=128."
+ }
+}
+
+variable "service_name" {
+ description = "Specifies the name this service will be advertised in Consul"
+ type = string
+ default = "AKIAVWHHCAXZJUHH7QFN"
+}
+
+variable "secret_key" {
+ description = "Minio secret key"
+ type = string
+ default = "SUSnhzMhoZTqteGJeGM7ry8cmL95fhksRloeL55j"
+}
+
+variable "upstreams" {
+ type = list(object({
+ service_name = string,
+ port = number,
+ }))
+ description = "List of upstream services"
+ default = []
+}
+
+variable "use_canary" {
+ description = "Uses canary deployment for Minio"
+ type = bool
+ default = false
+}
+
+variable "use_host_volume" {
+ description = "Use Nomad host volume feature"
+ type = bool
+ default = false
+}
+
+variable "vault_secret" {
+ type = object({
+ use_vault_provider = bool,
+ vault_kv_policy_name = string,
+ vault_kv_path = string,
+ vault_kv_field_access_key = string,
+ vault_kv_field_secret_key = string
+ })
+ description = "Set of properties to be able to fetch secret from vault"
+ default = {
+ use_vault_provider = false
+ vault_kv_policy_name = "kv"
+ vault_kv_path = "secret/data/minio"
+ vault_kv_field_access_key = "access_key"
+ vault_kv_field_secret_key = "secret_key"
+ }
+}
+
+variable "volume_destination" {
+ description = "Specifies where the volume should be mounted inside the task"
+ type = string
+ default = "/data/"
+}
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/providers.tf b/fdio.infra.terraform/1n_nmd/minio_s3_gateway/versions.tf
index 1399201d21..b80610a525 100644
--- a/fdio.infra.terraform/1n_nmd/prometheus/providers.tf
+++ b/fdio.infra.terraform/1n_nmd/minio_s3_gateway/versions.tf
@@ -2,11 +2,11 @@ terraform {
required_providers {
nomad = {
source = "hashicorp/nomad"
- version = "~> 1.4.9"
+ version = "~> 1.4.15"
}
template = {
source = "hashicorp/template"
- version = "~> 2.1.2"
+ version = "~> 2.2.0"
}
}
required_version = ">= 1.0.3"
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl b/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl
index 368a361184..2019aeba96 100644
--- a/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl
+++ b/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl
@@ -596,6 +596,13 @@ scrape_configs:
- server: '{{ env "NOMAD_IP_prometheus" }}:8500'
services: [ 'storage' ]
metrics_path: /minio/prometheus/metrics
+
+ - job_name: 'Minio Proxy'
+ bearer_token: eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ3OTAwNjE1NDIsImlzcyI6InByb21ldGhldXMiLCJzdWIiOiJBS0lBUTJBSDdZUFBXVDZDV1hYSSJ9.CU9x9j-yO0_Uta5iep6yqNiGQPolrr2608E3lpU6Yg21rIv_eOwS5zqzXaSvrhzkJP9H5kO1Pj6kqjYhbqjN_w
+ consul_sd_configs:
+ - server: '{{ env "NOMAD_IP_prometheus" }}:8500'
+ services: [ 'minio' ]
+ metrics_path: /minio/v2/metrics/cluster
EOH
}
diff --git a/fdio.infra.terraform/1n_nmd/vpp_device/providers.tf b/fdio.infra.terraform/1n_nmd/prometheus/versions.tf
index 1399201d21..b80610a525 100644
--- a/fdio.infra.terraform/1n_nmd/vpp_device/providers.tf
+++ b/fdio.infra.terraform/1n_nmd/prometheus/versions.tf
@@ -2,11 +2,11 @@ terraform {
required_providers {
nomad = {
source = "hashicorp/nomad"
- version = "~> 1.4.9"
+ version = "~> 1.4.15"
}
template = {
source = "hashicorp/template"
- version = "~> 2.1.2"
+ version = "~> 2.2.0"
}
}
required_version = ">= 1.0.3"
diff --git a/fdio.infra.terraform/1n_nmd/providers.tf b/fdio.infra.terraform/1n_nmd/providers.tf
index c3b9ec275d..92ddb553e7 100644
--- a/fdio.infra.terraform/1n_nmd/providers.tf
+++ b/fdio.infra.terraform/1n_nmd/providers.tf
@@ -1,21 +1,13 @@
-terraform {
- required_providers {
- nomad = {
- source = "hashicorp/nomad"
- version = "~> 1.4.9"
- }
- template = {
- source = "hashicorp/template"
- version = "~> 2.1.2"
- }
- vault = {
- version = ">=2.14.0"
- }
- }
- required_version = ">= 0.13"
-}
-
provider "nomad" {
address = var.nomad_provider_address
alias = "yul1"
+ # ca_file = var.nomad_provider_ca_file
+ # cert_file = var.nomad_provider_cert_file
+ # key_file = var.nomad_provider_key_file
+}
+
+provider "vault" {
+ address = "http://10.30.51.28:8200"
+ skip_tls_verify = true
+ token = var.token
} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/terraform.tfstate b/fdio.infra.terraform/1n_nmd/terraform.tfstate
deleted file mode 100644
index 701b8a34ae..0000000000
--- a/fdio.infra.terraform/1n_nmd/terraform.tfstate
+++ /dev/null
@@ -1,631 +0,0 @@
-{
- "version": 4,
- "terraform_version": "1.0.4",
- "serial": 1192,
- "lineage": "e4e7f30a-652d-7a31-e31c-5e3a3388c9b9",
- "outputs": {},
- "resources": [
- {
- "module": "module.alertmanager",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_alertmanager",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "2cd0a107978c5fd503445b71a5ad611daf96b7b57ad1bc15e3cb5c7dee391806",
- "rendered": "job \"prod-alertmanager\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-alertmanager\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n constraint {\n attribute = \"${node.class}\"\n value = \"builder\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-alertmanager\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"exec\"\n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n command = \"local/alertmanager-0.21.0.linux-amd64/alertmanager\"\n args = [\n \"--config.file=secrets/alertmanager.yml\"\n ]\n }\n\n # The artifact stanza instructs Nomad to fetch and unpack a remote resource,\n # such as a file, tarball, or binary. Nomad downloads artifacts using the\n # popular go-getter library, which permits downloading artifacts from a\n # variety of locations using a URL as the input source.\n #\n # For more information and examples on the \"artifact\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/artifact\n #\n artifact {\n source = \"https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/alertmanager.yml\"\n left_delimiter = \"{{{\"\n right_delimiter = \"}}}\"\n data = \u003c\u003cEOH\n# The directory from which notification templates are read.\ntemplates:\n- '/etc/alertmanager/template/*.tmpl'\n\n#tls_config:\n# # CA certificate to validate the server certificate with.\n# ca_file: \u003cfilepath\u003e ]\n#\n# # Certificate and key files for client cert authentication to the server.\n# cert_file: \u003cfilepath\u003e\n# key_file: \u003cfilepath\u003e\n#\n# # ServerName extension to indicate the name of the server.\n# # http://tools.ietf.org/html/rfc4366#section-3.1\n# server_name: \u003cstring\u003e\n#\n# # Disable validation of the server certificate.\n# insecure_skip_verify: true\n\n# The root route on which each incoming alert enters.\nroute:\n receiver: 'default-slack-receiver'\n\n # The labels by which incoming alerts are grouped together. For example,\n # multiple alerts coming in for cluster=A and alertname=LatencyHigh would\n # be batched into a single group.\n #\n # To aggregate by all possible labels use '...' as the sole label name.\n # This effectively disables aggregation entirely, passing through all\n # alerts as-is. This is unlikely to be what you want, unless you have\n # a very low alert volume or your upstream notification system performs\n # its own grouping. Example: group_by: [...]\n group_by: ['alertname']\n\n # When a new group of alerts is created by an incoming alert, wait at\n # least 'group_wait' to send the initial notification.\n # This way ensures that you get multiple alerts for the same group that start\n # firing shortly after another are batched together on the first\n # notification.\n group_wait: 30s\n\n # When the first notification was sent, wait 'group_interval' to send a batch\n # of new alerts that started firing for that group.\n group_interval: 5m\n\n # If an alert has successfully been sent, wait 'repeat_interval' to\n # resend them.\n repeat_interval: 3h\n\n # All the above attributes are inherited by all child routes and can\n # overwritten on each.\n # The child route trees.\n routes:\n - match_re:\n alertname: JenkinsJob.*\n receiver: jenkins-slack-receiver\n routes:\n - match:\n severity: critical\n receiver: 'jenkins-slack-receiver'\n\n - match_re:\n service: .*\n receiver: default-slack-receiver\n routes:\n - match:\n severity: critical\n receiver: 'default-slack-receiver'\n\n# Inhibition rules allow to mute a set of alerts given that another alert is\n# firing.\n# We use this to mute any warning-level notifications if the same alert is\n# already critical.\ninhibit_rules:\n- source_match:\n severity: 'critical'\n target_match:\n severity: 'warning'\n equal: ['alertname', 'instance']\n\nreceivers:\n- name: 'jenkins-slack-receiver'\n slack_configs:\n - api_url: 'TE07RD1V1/B01U1NV9HV3/hKZXJJ74g2JcISq4K3QC1eG9'\n channel: '#fdio-jobs-monitoring'\n send_resolved: true\n icon_url: https://avatars3.githubusercontent.com/u/3380462\n title: |-\n [{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}\n {{- if gt (len .CommonLabels) (len .GroupLabels) -}}\n {{\" \"}}(\n {{- with .CommonLabels.Remove .GroupLabels.Names }}\n {{- range $index, $label := .SortedPairs -}}\n {{ if $index }}, {{ end }}\n {{- $label.Name }}=\"{{ $label.Value -}}\"\n {{- end }}\n {{- end -}}\n )\n {{- end }}\n text: \u003e-\n {{ range .Alerts -}}\n *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}\n\n *Description:* {{ .Annotations.description }}\n\n *Details:*\n {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`\n {{ end }}\n {{ end }}\n\n- name: 'default-slack-receiver'\n slack_configs:\n - api_url: 'TE07RD1V1/B01UUK23B6C/hZTcCu42FUv8d6rtirHtcYIi'\n channel: '#fdio-infra-monitoring'\n send_resolved: true\n icon_url: https://avatars3.githubusercontent.com/u/3380462\n title: |-\n [{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}\n {{- if gt (len .CommonLabels) (len .GroupLabels) -}}\n {{\" \"}}(\n {{- with .CommonLabels.Remove .GroupLabels.Names }}\n {{- range $index, $label := .SortedPairs -}}\n {{ if $index }}, {{ end }}\n {{- $label.Name }}=\"{{ $label.Value -}}\"\n {{- end }}\n {{- end -}}\n )\n {{- end }}\n text: \u003e-\n {{ range .Alerts -}}\n *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}\n\n *Description:* {{ .Annotations.description }}\n\n *Details:*\n {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`\n {{ end }}\n {{ end }}\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"alertmanager\"\n port = \"alertmanager\"\n tags = [ \"alertmanager${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Alertmanager Check Live\"\n type = \"http\"\n path = \"/-/healthy\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 1000\n memory = 1024\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"alertmanager\" {\n static = 9093\n }\n }\n }\n }\n }\n}",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n%{ if use_canary }\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n%{ endif }\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-${service_name}\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = ${group_count}\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"$${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n constraint {\n attribute = \"$${node.class}\"\n value = \"builder\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-${service_name}\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"exec\"\n\n %{ if use_vault_provider }\n vault {\n policies = \"${vault_kv_policy_name}\"\n }\n %{ endif }\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n command = \"local/alertmanager-${version}.linux-amd64/alertmanager\"\n args = [\n \"--config.file=secrets/alertmanager.yml\"\n ]\n }\n\n # The artifact stanza instructs Nomad to fetch and unpack a remote resource,\n # such as a file, tarball, or binary. Nomad downloads artifacts using the\n # popular go-getter library, which permits downloading artifacts from a\n # variety of locations using a URL as the input source.\n #\n # For more information and examples on the \"artifact\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/artifact\n #\n artifact {\n source = \"${url}\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/alertmanager.yml\"\n left_delimiter = \"{{{\"\n right_delimiter = \"}}}\"\n data = \u003c\u003cEOH\n# The directory from which notification templates are read.\ntemplates:\n- '/etc/alertmanager/template/*.tmpl'\n\n#tls_config:\n# # CA certificate to validate the server certificate with.\n# ca_file: \u003cfilepath\u003e ]\n#\n# # Certificate and key files for client cert authentication to the server.\n# cert_file: \u003cfilepath\u003e\n# key_file: \u003cfilepath\u003e\n#\n# # ServerName extension to indicate the name of the server.\n# # http://tools.ietf.org/html/rfc4366#section-3.1\n# server_name: \u003cstring\u003e\n#\n# # Disable validation of the server certificate.\n# insecure_skip_verify: true\n\n# The root route on which each incoming alert enters.\nroute:\n receiver: '${slack_default_receiver}'\n\n # The labels by which incoming alerts are grouped together. For example,\n # multiple alerts coming in for cluster=A and alertname=LatencyHigh would\n # be batched into a single group.\n #\n # To aggregate by all possible labels use '...' as the sole label name.\n # This effectively disables aggregation entirely, passing through all\n # alerts as-is. This is unlikely to be what you want, unless you have\n # a very low alert volume or your upstream notification system performs\n # its own grouping. Example: group_by: [...]\n group_by: ['alertname']\n\n # When a new group of alerts is created by an incoming alert, wait at\n # least 'group_wait' to send the initial notification.\n # This way ensures that you get multiple alerts for the same group that start\n # firing shortly after another are batched together on the first\n # notification.\n group_wait: 30s\n\n # When the first notification was sent, wait 'group_interval' to send a batch\n # of new alerts that started firing for that group.\n group_interval: 5m\n\n # If an alert has successfully been sent, wait 'repeat_interval' to\n # resend them.\n repeat_interval: 3h\n\n # All the above attributes are inherited by all child routes and can\n # overwritten on each.\n # The child route trees.\n routes:\n - match_re:\n alertname: JenkinsJob.*\n receiver: ${slack_jenkins_receiver}\n routes:\n - match:\n severity: critical\n receiver: '${slack_jenkins_receiver}'\n\n - match_re:\n service: .*\n receiver: ${slack_default_receiver}\n routes:\n - match:\n severity: critical\n receiver: '${slack_default_receiver}'\n\n# Inhibition rules allow to mute a set of alerts given that another alert is\n# firing.\n# We use this to mute any warning-level notifications if the same alert is\n# already critical.\ninhibit_rules:\n- source_match:\n severity: 'critical'\n target_match:\n severity: 'warning'\n equal: ['alertname', 'instance']\n\nreceivers:\n- name: '${slack_jenkins_receiver}'\n slack_configs:\n - api_url: '${slack_jenkins_api_key}'\n channel: '#${slack_jenkins_channel}'\n send_resolved: true\n icon_url: https://avatars3.githubusercontent.com/u/3380462\n title: |-\n [{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}\n {{- if gt (len .CommonLabels) (len .GroupLabels) -}}\n {{\" \"}}(\n {{- with .CommonLabels.Remove .GroupLabels.Names }}\n {{- range $index, $label := .SortedPairs -}}\n {{ if $index }}, {{ end }}\n {{- $label.Name }}=\"{{ $label.Value -}}\"\n {{- end }}\n {{- end -}}\n )\n {{- end }}\n text: \u003e-\n {{ range .Alerts -}}\n *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}\n\n *Description:* {{ .Annotations.description }}\n\n *Details:*\n {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`\n {{ end }}\n {{ end }}\n\n- name: '${slack_default_receiver}'\n slack_configs:\n - api_url: '${slack_default_api_key}'\n channel: '#${slack_default_channel}'\n send_resolved: true\n icon_url: https://avatars3.githubusercontent.com/u/3380462\n title: |-\n [{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}\n {{- if gt (len .CommonLabels) (len .GroupLabels) -}}\n {{\" \"}}(\n {{- with .CommonLabels.Remove .GroupLabels.Names }}\n {{- range $index, $label := .SortedPairs -}}\n {{ if $index }}, {{ end }}\n {{- $label.Name }}=\"{{ $label.Value -}}\"\n {{- end }}\n {{- end -}}\n )\n {{- end }}\n text: \u003e-\n {{ range .Alerts -}}\n *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}\n\n *Description:* {{ .Annotations.description }}\n\n *Details:*\n {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`\n {{ end }}\n {{ end }}\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"${service_name}\"\n port = \"${service_name}\"\n tags = [ \"${service_name}$${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Alertmanager Check Live\"\n type = \"http\"\n path = \"/-/healthy\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = ${cpu}\n memory = ${mem}\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"${service_name}\" {\n static = ${port}\n }\n }\n }\n }\n }\n}",
- "vars": {
- "cpu": "1000",
- "datacenters": "yul1",
- "group_count": "1",
- "job_name": "prod-alertmanager",
- "mem": "1024",
- "port": "9093",
- "service_name": "alertmanager",
- "slack_default_api_key": "TE07RD1V1/B01UUK23B6C/hZTcCu42FUv8d6rtirHtcYIi",
- "slack_default_channel": "fdio-infra-monitoring",
- "slack_default_receiver": "default-slack-receiver",
- "slack_jenkins_api_key": "TE07RD1V1/B01U1NV9HV3/hKZXJJ74g2JcISq4K3QC1eG9",
- "slack_jenkins_channel": "fdio-jobs-monitoring",
- "slack_jenkins_receiver": "jenkins-slack-receiver",
- "url": "https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz",
- "use_canary": "true",
- "use_vault_provider": "false",
- "version": "0.21.0"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.alertmanager",
- "mode": "managed",
- "type": "nomad_job",
- "name": "nomad_job_alertmanager",
- "provider": "provider[\"registry.terraform.io/hashicorp/nomad\"].yul1",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "allocation_ids": [
- "74fdbd88-0e34-f990-3181-0413a373dcb2",
- "412155f9-a6d4-f17c-3898-1e1c7cc1912b"
- ],
- "datacenters": [
- "yul1"
- ],
- "deployment_id": "9b9e24ac-9932-f3eb-cc6e-cd64d41f8327",
- "deployment_status": "successful",
- "deregister_on_destroy": true,
- "deregister_on_id_change": true,
- "detach": false,
- "id": "prod-alertmanager",
- "jobspec": "job \"prod-alertmanager\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-alertmanager\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n constraint {\n attribute = \"${node.class}\"\n value = \"builder\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-alertmanager\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"exec\"\n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n command = \"local/alertmanager-0.21.0.linux-amd64/alertmanager\"\n args = [\n \"--config.file=secrets/alertmanager.yml\"\n ]\n }\n\n # The artifact stanza instructs Nomad to fetch and unpack a remote resource,\n # such as a file, tarball, or binary. Nomad downloads artifacts using the\n # popular go-getter library, which permits downloading artifacts from a\n # variety of locations using a URL as the input source.\n #\n # For more information and examples on the \"artifact\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/artifact\n #\n artifact {\n source = \"https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/alertmanager.yml\"\n left_delimiter = \"{{{\"\n right_delimiter = \"}}}\"\n data = \u003c\u003cEOH\n# The directory from which notification templates are read.\ntemplates:\n- '/etc/alertmanager/template/*.tmpl'\n\n#tls_config:\n# # CA certificate to validate the server certificate with.\n# ca_file: \u003cfilepath\u003e ]\n#\n# # Certificate and key files for client cert authentication to the server.\n# cert_file: \u003cfilepath\u003e\n# key_file: \u003cfilepath\u003e\n#\n# # ServerName extension to indicate the name of the server.\n# # http://tools.ietf.org/html/rfc4366#section-3.1\n# server_name: \u003cstring\u003e\n#\n# # Disable validation of the server certificate.\n# insecure_skip_verify: true\n\n# The root route on which each incoming alert enters.\nroute:\n receiver: 'default-slack-receiver'\n\n # The labels by which incoming alerts are grouped together. For example,\n # multiple alerts coming in for cluster=A and alertname=LatencyHigh would\n # be batched into a single group.\n #\n # To aggregate by all possible labels use '...' as the sole label name.\n # This effectively disables aggregation entirely, passing through all\n # alerts as-is. This is unlikely to be what you want, unless you have\n # a very low alert volume or your upstream notification system performs\n # its own grouping. Example: group_by: [...]\n group_by: ['alertname']\n\n # When a new group of alerts is created by an incoming alert, wait at\n # least 'group_wait' to send the initial notification.\n # This way ensures that you get multiple alerts for the same group that start\n # firing shortly after another are batched together on the first\n # notification.\n group_wait: 30s\n\n # When the first notification was sent, wait 'group_interval' to send a batch\n # of new alerts that started firing for that group.\n group_interval: 5m\n\n # If an alert has successfully been sent, wait 'repeat_interval' to\n # resend them.\n repeat_interval: 3h\n\n # All the above attributes are inherited by all child routes and can\n # overwritten on each.\n # The child route trees.\n routes:\n - match_re:\n alertname: JenkinsJob.*\n receiver: jenkins-slack-receiver\n routes:\n - match:\n severity: critical\n receiver: 'jenkins-slack-receiver'\n\n - match_re:\n service: .*\n receiver: default-slack-receiver\n routes:\n - match:\n severity: critical\n receiver: 'default-slack-receiver'\n\n# Inhibition rules allow to mute a set of alerts given that another alert is\n# firing.\n# We use this to mute any warning-level notifications if the same alert is\n# already critical.\ninhibit_rules:\n- source_match:\n severity: 'critical'\n target_match:\n severity: 'warning'\n equal: ['alertname', 'instance']\n\nreceivers:\n- name: 'jenkins-slack-receiver'\n slack_configs:\n - api_url: 'TE07RD1V1/B01U1NV9HV3/hKZXJJ74g2JcISq4K3QC1eG9'\n channel: '#fdio-jobs-monitoring'\n send_resolved: true\n icon_url: https://avatars3.githubusercontent.com/u/3380462\n title: |-\n [{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}\n {{- if gt (len .CommonLabels) (len .GroupLabels) -}}\n {{\" \"}}(\n {{- with .CommonLabels.Remove .GroupLabels.Names }}\n {{- range $index, $label := .SortedPairs -}}\n {{ if $index }}, {{ end }}\n {{- $label.Name }}=\"{{ $label.Value -}}\"\n {{- end }}\n {{- end -}}\n )\n {{- end }}\n text: \u003e-\n {{ range .Alerts -}}\n *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}\n\n *Description:* {{ .Annotations.description }}\n\n *Details:*\n {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`\n {{ end }}\n {{ end }}\n\n- name: 'default-slack-receiver'\n slack_configs:\n - api_url: 'TE07RD1V1/B01UUK23B6C/hZTcCu42FUv8d6rtirHtcYIi'\n channel: '#fdio-infra-monitoring'\n send_resolved: true\n icon_url: https://avatars3.githubusercontent.com/u/3380462\n title: |-\n [{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}\n {{- if gt (len .CommonLabels) (len .GroupLabels) -}}\n {{\" \"}}(\n {{- with .CommonLabels.Remove .GroupLabels.Names }}\n {{- range $index, $label := .SortedPairs -}}\n {{ if $index }}, {{ end }}\n {{- $label.Name }}=\"{{ $label.Value -}}\"\n {{- end }}\n {{- end -}}\n )\n {{- end }}\n text: \u003e-\n {{ range .Alerts -}}\n *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}\n\n *Description:* {{ .Annotations.description }}\n\n *Details:*\n {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`\n {{ end }}\n {{ end }}\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"alertmanager\"\n port = \"alertmanager\"\n tags = [ \"alertmanager${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Alertmanager Check Live\"\n type = \"http\"\n path = \"/-/healthy\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 1000\n memory = 1024\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"alertmanager\" {\n static = 9093\n }\n }\n }\n }\n }\n}",
- "json": null,
- "modify_index": "9138748",
- "name": "prod-alertmanager",
- "namespace": "default",
- "policy_override": null,
- "purge_on_destroy": null,
- "region": "global",
- "task_groups": [
- {
- "count": 1,
- "meta": {},
- "name": "prod-group1-alertmanager",
- "task": [
- {
- "driver": "exec",
- "meta": {},
- "name": "prod-task1-alertmanager",
- "volume_mounts": []
- }
- ],
- "volumes": []
- }
- ],
- "type": "service"
- },
- "sensitive_attributes": [],
- "private": "bnVsbA==",
- "dependencies": [
- "module.alertmanager.data.template_file.nomad_job_alertmanager"
- ]
- }
- ]
- },
- {
- "module": "module.grafana",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_grafana",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "ce0ab0b08359da392c19c0f354b0f082630aa887ad51701986b5ce80b8aa39ea",
- "rendered": "job \"prod-grafana\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-grafana\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n constraint {\n attribute = \"${node.class}\"\n value = \"builder\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-grafana\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"grafana/grafana:7.3.7\"\n dns_servers = [ \"172.17.0.1\" ]\n volumes = [\n \"secrets/prometheus.yml:/etc/grafana/provisioning/datasources/prometheus.yml\",\n \"secrets/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml\",\n \"secrets/grafana.ini:/etc/grafana/grafana.ini\",\n \"secrets/node_exporter.json:/etc/grafana/provisioning/dashboards/node_exporter.json\",\n \"secrets/nomad.json:/etc/grafana/provisioning/dashboards/nomad.json\",\n \"secrets/consul.json:/etc/grafana/provisioning/dashboards/consul.json\",\n \"secrets/prometheus.json:/etc/grafana/provisioning/dashboards/prometheus.json\",\n \"secrets/blackbox_exporter_http.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_http.json\",\n \"secrets/blackbox_exporter_icmp.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_icmp.json\"\n ]\n }\n\n artifact {\n # Prometheus Node Exporter\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/node_exporter.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Nomad\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/nomad.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Consul\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/consul.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/prometheus.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus Blackbox Exporter HTTP\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_http.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus Blackbox Exporter ICMP\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_icmp.json\"\n destination = \"secrets/\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/prometheus.yml\"\n data = \u003c\u003cEOH\napiVersion: 1\ndatasources:\n- name: Prometheus\n type: prometheus\n access: direct\n orgId: 1\n url: http://prometheus.service.consul:9090\n basicAuth: false\n isDefault: true\n version: 1\n editable: false\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/dashboards.yml\"\n data = \u003c\u003cEOH\napiVersion: 1\nproviders:\n- name: dashboards\n type: file\n disableDeletion: false\n updateIntervalSeconds: 10\n allowUiUpdates: false\n options:\n path: /etc/grafana/provisioning/dashboards\n foldersFromFilesStructure: true\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/grafana.ini\"\n data = \u003c\u003cEOH\napp_mode = production\n\n[metrics]\nenabled = true\n\n[server]\nprotocol = http\nhttp_port = 3000\nroot_url = http://grafana.service.consul:3000\nenable_gzip = true\n;cert_file =\n;cert_key =\n\n[security]\nadmin_user = grafanauser\nadmin_password = Grafana1234\nsecret_key = SW2YcwTIb9zpOOhoPsMm\n\n[users]\nallow_sign_up = false\nallow_org_create = false\nauto_assign_org = true\nauto_assign_org_role = Viewer\ndefault_theme = dark\n\n[auth.basic]\nenabled = true\n\n[auth]\ndisable_login_form = false\ndisable_signout_menu = false\n\n[auth.anonymous]\nenabled = false\n\n[log]\nmode = console\nlevel = info\n\n[log.console]\nlevel = info\nformat = console\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"grafana\"\n port = \"grafana\"\n tags = [ \"grafana${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Grafana Check Live\"\n type = \"http\"\n protocol = \"http\"\n tls_skip_verify = true\n path = \"/api/health\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 1000\n memory = 2048\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"grafana\" {\n static = 3000\n }\n }\n }\n }\n }\n}",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n%{ if use_canary }\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n%{ endif }\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-${service_name}\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = ${group_count}\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"$${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n constraint {\n attribute = \"$${node.class}\"\n value = \"builder\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-${service_name}\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"${image}\"\n dns_servers = [ \"172.17.0.1\" ]\n volumes = [\n \"secrets/prometheus.yml:/etc/grafana/provisioning/datasources/prometheus.yml\",\n \"secrets/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml\",\n \"secrets/grafana.ini:/etc/grafana/grafana.ini\",\n \"secrets/node_exporter.json:/etc/grafana/provisioning/dashboards/node_exporter.json\",\n \"secrets/nomad.json:/etc/grafana/provisioning/dashboards/nomad.json\",\n \"secrets/consul.json:/etc/grafana/provisioning/dashboards/consul.json\",\n \"secrets/prometheus.json:/etc/grafana/provisioning/dashboards/prometheus.json\",\n \"secrets/blackbox_exporter_http.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_http.json\",\n \"secrets/blackbox_exporter_icmp.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_icmp.json\"\n ]\n }\n\n artifact {\n # Prometheus Node Exporter\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/node_exporter.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Nomad\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/nomad.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Consul\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/consul.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/prometheus.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus Blackbox Exporter HTTP\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_http.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus Blackbox Exporter ICMP\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_icmp.json\"\n destination = \"secrets/\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/prometheus.yml\"\n data = \u003c\u003cEOH\napiVersion: 1\ndatasources:\n- name: Prometheus\n type: prometheus\n access: direct\n orgId: 1\n url: http://prometheus.service.consul:9090\n basicAuth: false\n isDefault: true\n version: 1\n editable: false\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/dashboards.yml\"\n data = \u003c\u003cEOH\napiVersion: 1\nproviders:\n- name: dashboards\n type: file\n disableDeletion: false\n updateIntervalSeconds: 10\n allowUiUpdates: false\n options:\n path: /etc/grafana/provisioning/dashboards\n foldersFromFilesStructure: true\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/grafana.ini\"\n data = \u003c\u003cEOH\napp_mode = production\n\n[metrics]\nenabled = true\n\n[server]\nprotocol = http\nhttp_port = ${port}\nroot_url = http://${service_name}.service.consul:${port}\nenable_gzip = true\n;cert_file =\n;cert_key =\n\n[security]\nadmin_user = grafanauser\nadmin_password = Grafana1234\nsecret_key = SW2YcwTIb9zpOOhoPsMm\n\n[users]\nallow_sign_up = false\nallow_org_create = false\nauto_assign_org = true\nauto_assign_org_role = Viewer\ndefault_theme = dark\n\n[auth.basic]\nenabled = true\n\n[auth]\ndisable_login_form = false\ndisable_signout_menu = false\n\n[auth.anonymous]\nenabled = false\n\n[log]\nmode = console\nlevel = info\n\n[log.console]\nlevel = info\nformat = console\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"${service_name}\"\n port = \"${service_name}\"\n tags = [ \"${service_name}$${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Grafana Check Live\"\n type = \"http\"\n protocol = \"http\"\n tls_skip_verify = true\n path = \"/api/health\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = ${cpu}\n memory = ${mem}\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"${service_name}\" {\n static = ${port}\n }\n }\n }\n }\n }\n}",
- "vars": {
- "cpu": "1000",
- "datacenters": "yul1",
- "group_count": "1",
- "image": "grafana/grafana:7.3.7",
- "job_name": "prod-grafana",
- "mem": "2048",
- "port": "3000",
- "service_name": "grafana",
- "use_canary": "true",
- "use_vault_provider": "false"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.grafana",
- "mode": "managed",
- "type": "nomad_job",
- "name": "nomad_job_grafana",
- "provider": "provider[\"registry.terraform.io/hashicorp/nomad\"].yul1",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "allocation_ids": [
- "218d0f2c-ecf3-30bd-edc0-eaa3a4d8b384",
- "18869dde-8c4e-3aa0-1baa-ddcb5c6dc5bc"
- ],
- "datacenters": [
- "yul1"
- ],
- "deployment_id": "9bd732cd-ec36-7f96-1b82-24e60b51c048",
- "deployment_status": "successful",
- "deregister_on_destroy": true,
- "deregister_on_id_change": true,
- "detach": false,
- "id": "prod-grafana",
- "jobspec": "job \"prod-grafana\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-grafana\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n constraint {\n attribute = \"${node.class}\"\n value = \"builder\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-grafana\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"grafana/grafana:7.3.7\"\n dns_servers = [ \"172.17.0.1\" ]\n volumes = [\n \"secrets/prometheus.yml:/etc/grafana/provisioning/datasources/prometheus.yml\",\n \"secrets/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml\",\n \"secrets/grafana.ini:/etc/grafana/grafana.ini\",\n \"secrets/node_exporter.json:/etc/grafana/provisioning/dashboards/node_exporter.json\",\n \"secrets/nomad.json:/etc/grafana/provisioning/dashboards/nomad.json\",\n \"secrets/consul.json:/etc/grafana/provisioning/dashboards/consul.json\",\n \"secrets/prometheus.json:/etc/grafana/provisioning/dashboards/prometheus.json\",\n \"secrets/blackbox_exporter_http.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_http.json\",\n \"secrets/blackbox_exporter_icmp.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_icmp.json\"\n ]\n }\n\n artifact {\n # Prometheus Node Exporter\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/node_exporter.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Nomad\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/nomad.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Consul\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/consul.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/prometheus.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus Blackbox Exporter HTTP\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_http.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus Blackbox Exporter ICMP\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_icmp.json\"\n destination = \"secrets/\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/prometheus.yml\"\n data = \u003c\u003cEOH\napiVersion: 1\ndatasources:\n- name: Prometheus\n type: prometheus\n access: direct\n orgId: 1\n url: http://prometheus.service.consul:9090\n basicAuth: false\n isDefault: true\n version: 1\n editable: false\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/dashboards.yml\"\n data = \u003c\u003cEOH\napiVersion: 1\nproviders:\n- name: dashboards\n type: file\n disableDeletion: false\n updateIntervalSeconds: 10\n allowUiUpdates: false\n options:\n path: /etc/grafana/provisioning/dashboards\n foldersFromFilesStructure: true\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/grafana.ini\"\n data = \u003c\u003cEOH\napp_mode = production\n\n[metrics]\nenabled = true\n\n[server]\nprotocol = http\nhttp_port = 3000\nroot_url = http://grafana.service.consul:3000\nenable_gzip = true\n;cert_file =\n;cert_key =\n\n[security]\nadmin_user = grafanauser\nadmin_password = Grafana1234\nsecret_key = SW2YcwTIb9zpOOhoPsMm\n\n[users]\nallow_sign_up = false\nallow_org_create = false\nauto_assign_org = true\nauto_assign_org_role = Viewer\ndefault_theme = dark\n\n[auth.basic]\nenabled = true\n\n[auth]\ndisable_login_form = false\ndisable_signout_menu = false\n\n[auth.anonymous]\nenabled = false\n\n[log]\nmode = console\nlevel = info\n\n[log.console]\nlevel = info\nformat = console\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"grafana\"\n port = \"grafana\"\n tags = [ \"grafana${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Grafana Check Live\"\n type = \"http\"\n protocol = \"http\"\n tls_skip_verify = true\n path = \"/api/health\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 1000\n memory = 2048\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"grafana\" {\n static = 3000\n }\n }\n }\n }\n }\n}",
- "json": null,
- "modify_index": "9138744",
- "name": "prod-grafana",
- "namespace": "default",
- "policy_override": null,
- "purge_on_destroy": null,
- "region": "global",
- "task_groups": [
- {
- "count": 1,
- "meta": {},
- "name": "prod-group1-grafana",
- "task": [
- {
- "driver": "docker",
- "meta": {},
- "name": "prod-task1-grafana",
- "volume_mounts": []
- }
- ],
- "volumes": []
- }
- ],
- "type": "service"
- },
- "sensitive_attributes": [],
- "private": "bnVsbA==",
- "dependencies": [
- "module.grafana.data.template_file.nomad_job_grafana"
- ]
- }
- ]
- },
- {
- "module": "module.minio",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_mc",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "e9e956124e6fed4268bec199b9d7fd7aaff506e906b53621c6dc0e3116c40f52",
- "rendered": "job \"prod-mc\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"batch\"\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-mc\" {\n task \"prod-task1-create-buckets\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"minio/mc:RELEASE.2021-07-27T02-40-15Z\"\n entrypoint = [\n \"/bin/sh\",\n \"-c\",\n \"mc config host add LOCALMINIO http://storage.service.consul:9000 $MINIO_ACCESS_KEY $MINIO_SECRET_KEY \u0026\u0026 mc mb -p LOCALMINIO/logs.fd.io LOCALMINIO/docs.fd.io ; mc policy set public LOCALMINIO/logs.fd.io mc policy set public LOCALMINIO/docs.fd.io mc ilm add --expiry-days '180' LOCALMINIO/logs.fd.io mc admin user add LOCALMINIO storage Storage1234 mc admin policy set LOCALMINIO writeonly user=storage\"\n ]\n dns_servers = [ \"${attr.unique.network.ip-address}\" ]\n privileged = false\n }\n\n # The env stanza configures a list of environment variables to populate\n # the task's environment before starting.\n env {\n \n MINIO_ACCESS_KEY = \"minio\"\n MINIO_SECRET_KEY = \"minio123\"\n \n \n }\n }\n }\n}\n",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"batch\"\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-mc\" {\n task \"prod-task1-create-buckets\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n %{ if use_vault_provider }\n vault {\n policies = \"${vault_kv_policy_name}\"\n }\n %{ endif }\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"${image}\"\n entrypoint = [\n \"/bin/sh\",\n \"-c\",\n \"${command}\"\n ]\n dns_servers = [ \"$${attr.unique.network.ip-address}\" ]\n privileged = false\n }\n\n # The env stanza configures a list of environment variables to populate\n # the task's environment before starting.\n env {\n %{ if use_vault_provider }\n {{ with secret \"${vault_kv_path}\" }}\n MINIO_ACCESS_KEY = \"{{ .Data.data.${vault_kv_field_access_key} }}\"\n MINIO_SECRET_KEY = \"{{ .Data.data.${vault_kv_field_secret_key} }}\"\n {{ end }}\n %{ else }\n MINIO_ACCESS_KEY = \"${access_key}\"\n MINIO_SECRET_KEY = \"${secret_key}\"\n %{ endif }\n ${ envs }\n }\n }\n }\n}\n",
- "vars": {
- "access_key": "minio",
- "command": "mc config host add LOCALMINIO http://storage.service.consul:9000 $MINIO_ACCESS_KEY $MINIO_SECRET_KEY \u0026\u0026 mc mb -p LOCALMINIO/logs.fd.io LOCALMINIO/docs.fd.io ; mc policy set public LOCALMINIO/logs.fd.io mc policy set public LOCALMINIO/docs.fd.io mc ilm add --expiry-days '180' LOCALMINIO/logs.fd.io mc admin user add LOCALMINIO storage Storage1234 mc admin policy set LOCALMINIO writeonly user=storage",
- "datacenters": "yul1",
- "envs": "",
- "image": "minio/mc:RELEASE.2021-07-27T02-40-15Z",
- "job_name": "prod-mc",
- "minio_port": "9000",
- "minio_service_name": "storage",
- "secret_key": "minio123",
- "service_name": "mc",
- "use_vault_provider": "false"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.minio",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_minio",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "68243169c0df15ef6b56803101ed0c744a84545c34df366ee4036052c6292ef3",
- "rendered": "job \"prod-minio\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # All groups in this job should be scheduled on different hosts.\n constraint {\n operator = \"distinct_hosts\"\n value = \"true\"\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-minio\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 4\n\n # https://www.nomadproject.io/docs/job-specification/volume\n \n volume \"prod-volume1-minio\" {\n type = \"host\"\n read_only = false\n source = \"prod-volume-data1-1\"\n }\n \n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-minio\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n \n volume_mount {\n volume = \"prod-volume1-minio\"\n destination = \"/data/\"\n read_only = false\n }\n \n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"minio/minio:RELEASE.2021-07-27T02-40-15Z\"\n dns_servers = [ \"172.17.0.1\" ]\n network_mode = \"host\"\n command = \"server\"\n args = [ \"http://10.32.8.1{4...7}:9000/data/\" ]\n port_map {\n http = 9000\n }\n privileged = false\n }\n\n # The env stanza configures a list of environment variables to populate\n # the task's environment before starting.\n env {\n\n MINIO_ACCESS_KEY = \"minio\"\n MINIO_SECRET_KEY = \"minio123\"\n\n MINIO_BROWSER=\"off\"\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"storage\"\n port = \"http\"\n tags = [ \"storage${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Min.io Server HTTP Check Live\"\n type = \"http\"\n port = \"http\"\n protocol = \"http\"\n method = \"GET\"\n path = \"/minio/health/live\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n check {\n name = \"Min.io Server HTTP Check Ready\"\n type = \"http\"\n port = \"http\"\n protocol = \"http\"\n method = \"GET\"\n path = \"/minio/health/ready\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 40000\n memory = 40000\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"http\" {\n static = 9000\n }\n }\n }\n }\n }\n}\n",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n%{ if use_canary }\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n%{ endif }\n }\n\n # All groups in this job should be scheduled on different hosts.\n constraint {\n operator = \"distinct_hosts\"\n value = \"true\"\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-minio\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = ${group_count}\n\n # https://www.nomadproject.io/docs/job-specification/volume\n %{ if use_host_volume }\n volume \"prod-volume1-minio\" {\n type = \"host\"\n read_only = false\n source = \"${host_volume}\"\n }\n %{ endif }\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-minio\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n %{ if use_host_volume }\n volume_mount {\n volume = \"prod-volume1-minio\"\n destination = \"${data_dir}\"\n read_only = false\n }\n %{ endif }\n\n %{ if use_vault_provider }\n vault {\n policies = \"${vault_kv_policy_name}\"\n }\n %{ endif }\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"${image}\"\n dns_servers = [ \"172.17.0.1\" ]\n network_mode = \"host\"\n command = \"server\"\n args = [ \"${host}:${port}${data_dir}\" ]\n port_map {\n http = ${port}\n }\n privileged = false\n }\n\n # The env stanza configures a list of environment variables to populate\n # the task's environment before starting.\n env {\n%{ if use_vault_provider }\n{{ with secret \"${vault_kv_path}\" }}\n MINIO_ACCESS_KEY = \"{{ .Data.data.${vault_kv_field_access_key} }}\"\n MINIO_SECRET_KEY = \"{{ .Data.data.${vault_kv_field_secret_key} }}\"\n{{ end }}\n%{ else }\n MINIO_ACCESS_KEY = \"${access_key}\"\n MINIO_SECRET_KEY = \"${secret_key}\"\n%{ endif }\n ${ envs }\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"${service_name}\"\n port = \"http\"\n tags = [ \"storage$${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Min.io Server HTTP Check Live\"\n type = \"http\"\n port = \"http\"\n protocol = \"http\"\n method = \"GET\"\n path = \"/minio/health/live\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n check {\n name = \"Min.io Server HTTP Check Ready\"\n type = \"http\"\n port = \"http\"\n protocol = \"http\"\n method = \"GET\"\n path = \"/minio/health/ready\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = ${cpu}\n memory = ${memory}\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"http\" {\n static = ${port}\n }\n }\n }\n }\n }\n}\n",
- "vars": {
- "access_key": "minio",
- "cpu": "40000",
- "cpu_proxy": "200",
- "data_dir": "/data/",
- "datacenters": "yul1",
- "envs": "MINIO_BROWSER=\"off\"",
- "group_count": "4",
- "host": "http://10.32.8.1{4...7}",
- "host_volume": "prod-volume-data1-1",
- "image": "minio/minio:RELEASE.2021-07-27T02-40-15Z",
- "job_name": "prod-minio",
- "memory": "40000",
- "memory_proxy": "128",
- "port": "9000",
- "secret_key": "minio123",
- "service_name": "storage",
- "upstreams": "[]",
- "use_canary": "true",
- "use_host_volume": "true",
- "use_vault_provider": "false"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.minio",
- "mode": "managed",
- "type": "nomad_job",
- "name": "nomad_job_minio",
- "provider": "provider[\"registry.terraform.io/hashicorp/nomad\"].yul1",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "allocation_ids": [
- "924ac00a-b010-4af9-712d-1958c43c5363",
- "1f464e9b-5892-45d8-9f6c-538ff61ac976",
- "8b00df36-88c8-9cad-ce94-a2827b815be4",
- "d257257b-4ed7-1a72-8ad5-7dc078e371b3"
- ],
- "datacenters": [
- "yul1"
- ],
- "deployment_id": "33d8013c-7c7c-6828-a204-58d9286b61ef",
- "deployment_status": "successful",
- "deregister_on_destroy": true,
- "deregister_on_id_change": true,
- "detach": false,
- "id": "prod-minio",
- "jobspec": "job \"prod-minio\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # All groups in this job should be scheduled on different hosts.\n constraint {\n operator = \"distinct_hosts\"\n value = \"true\"\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-minio\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 4\n\n # https://www.nomadproject.io/docs/job-specification/volume\n \n volume \"prod-volume1-minio\" {\n type = \"host\"\n read_only = false\n source = \"prod-volume-data1-1\"\n }\n \n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-minio\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n \n volume_mount {\n volume = \"prod-volume1-minio\"\n destination = \"/data/\"\n read_only = false\n }\n \n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"minio/minio:RELEASE.2021-07-27T02-40-15Z\"\n dns_servers = [ \"172.17.0.1\" ]\n network_mode = \"host\"\n command = \"server\"\n args = [ \"http://10.32.8.1{4...7}:9000/data/\" ]\n port_map {\n http = 9000\n }\n privileged = false\n }\n\n # The env stanza configures a list of environment variables to populate\n # the task's environment before starting.\n env {\n\n MINIO_ACCESS_KEY = \"minio\"\n MINIO_SECRET_KEY = \"minio123\"\n\n MINIO_BROWSER=\"off\"\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"storage\"\n port = \"http\"\n tags = [ \"storage${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Min.io Server HTTP Check Live\"\n type = \"http\"\n port = \"http\"\n protocol = \"http\"\n method = \"GET\"\n path = \"/minio/health/live\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n check {\n name = \"Min.io Server HTTP Check Ready\"\n type = \"http\"\n port = \"http\"\n protocol = \"http\"\n method = \"GET\"\n path = \"/minio/health/ready\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 40000\n memory = 40000\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"http\" {\n static = 9000\n }\n }\n }\n }\n }\n}\n",
- "json": null,
- "modify_index": "8949013",
- "name": "prod-minio",
- "namespace": "default",
- "policy_override": null,
- "purge_on_destroy": null,
- "region": "global",
- "task_groups": [
- {
- "count": 4,
- "meta": {},
- "name": "prod-group1-minio",
- "task": [
- {
- "driver": "docker",
- "meta": {},
- "name": "prod-task1-minio",
- "volume_mounts": [
- {
- "destination": "/data/",
- "read_only": false,
- "volume": "prod-volume1-minio"
- }
- ]
- }
- ],
- "volumes": [
- {
- "name": "prod-volume1-minio",
- "read_only": false,
- "source": "prod-volume-data1-1",
- "type": "host"
- }
- ]
- }
- ],
- "type": "service"
- },
- "sensitive_attributes": [],
- "private": "bnVsbA==",
- "dependencies": [
- "module.minio.data.template_file.nomad_job_minio"
- ]
- }
- ]
- },
- {
- "module": "module.nginx",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_nginx",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "6406c7bfb6f177869e124475f94fada7f69c2e6631c6943a6a00a167fd19a925",
- "rendered": "job \"prod-nginx\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 0\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = false\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 0\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-nginx\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # https://www.nomadproject.io/docs/job-specification/volume\n \n volume \"prod-volume1-nginx\" {\n type = \"host\"\n read_only = false\n source = \"prod-volume-data1-1\"\n }\n \n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n constraint {\n attribute = \"${node.class}\"\n value = \"builder\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-nginx\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"nginx:stable\"\n port_map {\n https = 443\n }\n privileged = false\n volumes = [\n \"/etc/ssl/certs/logs.nginx.service.consul.crt:/etc/ssl/certs/logs.nginx.service.consul.crt\",\n \"/etc/ssl/private/logs.nginx.service.consul.key:/etc/ssl/private/logs.nginx.service.consul.key\",\n \"custom/upstream.conf:/etc/nginx/conf.d/upstream.conf\",\n \"custom/server_logs.conf:/etc/nginx/conf.d/server_logs.conf\"\n ]\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template.html\n #\n template {\n data = \u003c\u003cEOH\n upstream storage {\n {{ range service \"storage\" }}\n server {{ .Address }}:{{ .Port }};\n {{ end }}\n }\n EOH\n destination = \"custom/upstream.conf\"\n }\n template {\n data = \u003c\u003cEOH\n server {\n listen 443 ssl default_server;\n server_name logs.nginx.service.consul;\n\n ssl_certificate /etc/ssl/certs/logs.nginx.service.consul.crt;\n ssl_certificate_key /etc/ssl/private/logs.nginx.service.consul.key;\n ssl_protocols TLSv1.2;\n ssl_prefer_server_ciphers on;\n ssl_ciphers \"ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384\";\n ssl_session_timeout 10m;\n ssl_session_cache shared:SSL:10m;\n ssl_session_tickets off;\n ssl_stapling on;\n ssl_stapling_verify on;\n\n fastcgi_hide_header X-Powered-By;\n\n client_max_body_size 0;\n client_header_timeout 60;\n client_body_timeout 86400;\n fastcgi_read_timeout 86400;\n proxy_connect_timeout 60;\n proxy_read_timeout 86400;\n proxy_send_timeout 86400;\n send_timeout 86400;\n\n keepalive_timeout 70;\n location / {\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/;\n server_name_in_redirect off;\n }\n location ~ (.*html.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type text/html;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n location ~ (.*txt.gz|.*log.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type text/plain;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n location ~ (.*xml.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type application/xml;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n }\n EOH\n destination = \"custom/logs.conf\"\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service.html\n #\n service {\n name = \"nginx\"\n port = \"https\"\n tags = [ \"logs\" ]\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 2000\n memory = 4096\n network {\n mode = \"bridge\"\n port \"https\" {\n static = 443\n }\n }\n }\n }\n }\n}",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 0\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = false\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 0\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-nginx\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # https://www.nomadproject.io/docs/job-specification/volume\n %{ if use_host_volume }\n volume \"prod-volume1-nginx\" {\n type = \"host\"\n read_only = false\n source = \"${host_volume}\"\n }\n %{ endif }\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"$${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n constraint {\n attribute = \"$${node.class}\"\n value = \"builder\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-nginx\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"nginx:stable\"\n port_map {\n https = 443\n }\n privileged = false\n volumes = [\n \"/etc/ssl/certs/logs.nginx.service.consul.crt:/etc/ssl/certs/logs.nginx.service.consul.crt\",\n \"/etc/ssl/private/logs.nginx.service.consul.key:/etc/ssl/private/logs.nginx.service.consul.key\",\n \"custom/upstream.conf:/etc/nginx/conf.d/upstream.conf\",\n \"custom/server_logs.conf:/etc/nginx/conf.d/server_logs.conf\"\n ]\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template.html\n #\n template {\n data = \u003c\u003cEOH\n upstream storage {\n {{ range service \"storage\" }}\n server {{ .Address }}:{{ .Port }};\n {{ end }}\n }\n EOH\n destination = \"custom/upstream.conf\"\n }\n template {\n data = \u003c\u003cEOH\n server {\n listen 443 ssl default_server;\n server_name logs.nginx.service.consul;\n\n ssl_certificate /etc/ssl/certs/logs.nginx.service.consul.crt;\n ssl_certificate_key /etc/ssl/private/logs.nginx.service.consul.key;\n ssl_protocols TLSv1.2;\n ssl_prefer_server_ciphers on;\n ssl_ciphers \"ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384\";\n ssl_session_timeout 10m;\n ssl_session_cache shared:SSL:10m;\n ssl_session_tickets off;\n ssl_stapling on;\n ssl_stapling_verify on;\n\n fastcgi_hide_header X-Powered-By;\n\n client_max_body_size 0;\n client_header_timeout 60;\n client_body_timeout 86400;\n fastcgi_read_timeout 86400;\n proxy_connect_timeout 60;\n proxy_read_timeout 86400;\n proxy_send_timeout 86400;\n send_timeout 86400;\n\n keepalive_timeout 70;\n location / {\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/;\n server_name_in_redirect off;\n }\n location ~ (.*html.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type text/html;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n location ~ (.*txt.gz|.*log.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type text/plain;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n location ~ (.*xml.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type application/xml;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n }\n EOH\n destination = \"custom/logs.conf\"\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service.html\n #\n service {\n name = \"nginx\"\n port = \"https\"\n tags = [ \"logs\" ]\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 2000\n memory = 4096\n network {\n mode = \"bridge\"\n port \"https\" {\n static = 443\n }\n }\n }\n }\n }\n}",
- "vars": {
- "datacenters": "yul1",
- "host_volume": "prod-volume-data1-1",
- "job_name": "prod-nginx",
- "use_host_volume": "true"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.nginx",
- "mode": "managed",
- "type": "nomad_job",
- "name": "nomad_job_nginx",
- "provider": "provider[\"registry.terraform.io/hashicorp/nomad\"].yul1",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "allocation_ids": [
- "5f0b5208-cb84-507d-18d0-d294925c13e0",
- "9a5e3312-8802-e842-1eb0-0cc01a4d4575"
- ],
- "datacenters": [
- "yul1"
- ],
- "deployment_id": "",
- "deployment_status": "",
- "deregister_on_destroy": true,
- "deregister_on_id_change": true,
- "detach": false,
- "id": "prod-nginx",
- "jobspec": "job \"prod-nginx\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 0\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = false\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 0\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-nginx\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # https://www.nomadproject.io/docs/job-specification/volume\n \n volume \"prod-volume1-nginx\" {\n type = \"host\"\n read_only = false\n source = \"prod-volume-data1-1\"\n }\n \n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n constraint {\n attribute = \"${node.class}\"\n value = \"builder\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-nginx\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"nginx:stable\"\n port_map {\n https = 443\n }\n privileged = false\n volumes = [\n \"/etc/ssl/certs/logs.nginx.service.consul.crt:/etc/ssl/certs/logs.nginx.service.consul.crt\",\n \"/etc/ssl/private/logs.nginx.service.consul.key:/etc/ssl/private/logs.nginx.service.consul.key\",\n \"custom/upstream.conf:/etc/nginx/conf.d/upstream.conf\",\n \"custom/server_logs.conf:/etc/nginx/conf.d/server_logs.conf\"\n ]\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template.html\n #\n template {\n data = \u003c\u003cEOH\n upstream storage {\n {{ range service \"storage\" }}\n server {{ .Address }}:{{ .Port }};\n {{ end }}\n }\n EOH\n destination = \"custom/upstream.conf\"\n }\n template {\n data = \u003c\u003cEOH\n server {\n listen 443 ssl default_server;\n server_name logs.nginx.service.consul;\n\n ssl_certificate /etc/ssl/certs/logs.nginx.service.consul.crt;\n ssl_certificate_key /etc/ssl/private/logs.nginx.service.consul.key;\n ssl_protocols TLSv1.2;\n ssl_prefer_server_ciphers on;\n ssl_ciphers \"ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384\";\n ssl_session_timeout 10m;\n ssl_session_cache shared:SSL:10m;\n ssl_session_tickets off;\n ssl_stapling on;\n ssl_stapling_verify on;\n\n fastcgi_hide_header X-Powered-By;\n\n client_max_body_size 0;\n client_header_timeout 60;\n client_body_timeout 86400;\n fastcgi_read_timeout 86400;\n proxy_connect_timeout 60;\n proxy_read_timeout 86400;\n proxy_send_timeout 86400;\n send_timeout 86400;\n\n keepalive_timeout 70;\n location / {\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/;\n server_name_in_redirect off;\n }\n location ~ (.*html.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type text/html;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n location ~ (.*txt.gz|.*log.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type text/plain;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n location ~ (.*xml.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type application/xml;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n }\n EOH\n destination = \"custom/logs.conf\"\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service.html\n #\n service {\n name = \"nginx\"\n port = \"https\"\n tags = [ \"logs\" ]\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 2000\n memory = 4096\n network {\n mode = \"bridge\"\n port \"https\" {\n static = 443\n }\n }\n }\n }\n }\n}",
- "json": null,
- "modify_index": "9138746",
- "name": "prod-nginx",
- "namespace": "default",
- "policy_override": null,
- "purge_on_destroy": null,
- "region": "global",
- "task_groups": [
- {
- "count": 1,
- "meta": {},
- "name": "prod-group1-nginx",
- "task": [
- {
- "driver": "docker",
- "meta": {},
- "name": "prod-task1-nginx",
- "volume_mounts": []
- }
- ],
- "volumes": [
- {
- "name": "prod-volume1-nginx",
- "read_only": false,
- "source": "prod-volume-data1-1",
- "type": "host"
- }
- ]
- }
- ],
- "type": "service"
- },
- "sensitive_attributes": [],
- "private": "bnVsbA==",
- "dependencies": [
- "module.nginx.data.template_file.nomad_job_nginx"
- ]
- }
- ]
- },
- {
- "module": "module.prometheus",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_prometheus",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "2cfc207e58bdcd266ac342e51f9e5a442c595ddad94d04462ca1ad78ea24cf61",
- "rendered": "job \"prod-prometheus\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-prometheus\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 4\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The volume stanza allows the group to specify that it requires a given\n # volume from the cluster.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/volume\n #\n \n volume \"prod-volume1-prometheus\" {\n type = \"host\"\n read_only = false\n source = \"prod-volume-data1-1\"\n }\n \n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n constraint {\n attribute = \"${node.class}\"\n value = \"builder\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-prometheus\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"exec\"\n\n \n volume_mount {\n volume = \"prod-volume1-prometheus\"\n destination = \"/data/\"\n read_only = false\n }\n \n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n command = \"local/prometheus-2.28.1.linux-amd64/prometheus\"\n args = [\n \"--config.file=secrets/prometheus.yml\",\n \"--storage.tsdb.path=/data/prometheus/\",\n \"--storage.tsdb.retention.time=7d\"\n ]\n }\n\n # The artifact stanza instructs Nomad to fetch and unpack a remote resource,\n # such as a file, tarball, or binary. Nomad downloads artifacts using the\n # popular go-getter library, which permits downloading artifacts from a\n # variety of locations using a URL as the input source.\n #\n # For more information and examples on the \"artifact\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/artifact\n #\n artifact {\n source = \"https://github.com/prometheus/prometheus/releases/download/v2.28.1/prometheus-2.28.1.linux-amd64.tar.gz\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/alerts.yml\"\n left_delimiter = \"{{{\"\n right_delimiter = \"}}}\"\n data = \u003c\u003cEOH\n---\ngroups:\n- name: \"Jenkins Job Health Exporter\"\n rules:\n - alert: JenkinsJobHealthExporterFailures\n expr: jenkins_job_failure{id=~\".*\"} \u003e jenkins_job_success{id=~\".*\"}\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Jenkins Job Health detected high failure rate on jenkins jobs.\"\n description: \"Job: {{ $labels.id }}\"\n - alert: JenkinsJobHealthExporterUnstable\n expr: jenkins_job_unstable{id=~\".*\"} \u003e jenkins_job_success{id=~\".*\"}\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Jenkins Job Health detected high unstable rate on jenkins jobs.\"\n description: \"Job: {{ $labels.id }}\"\n- name: \"Consul\"\n rules:\n - alert: ConsulServiceHealthcheckFailed\n expr: consul_catalog_service_node_healthy == 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul service healthcheck failed (instance {{ $labels.instance }}).\"\n description: \"Service: `{{ $labels.service_name }}` Healthcheck: `{{ $labels.service_id }}`.\"\n - alert: ConsulMissingMasterNode\n expr: consul_raft_peers \u003c 3\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul missing master node (instance {{ $labels.instance }}).\"\n description: \"Numbers of consul raft peers should be 3, in order to preserve quorum.\"\n - alert: ConsulAgentUnhealthy\n expr: consul_health_node_status{status=\"critical\"} == 1\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul agent unhealthy (instance {{ $labels.instance }}).\"\n description: \"A Consul agent is down.\"\n- name: \"Hosts\"\n rules:\n - alert: NodeDown\n expr: up == 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus target missing (instance {{ $labels.instance }}).\"\n description: \"A Prometheus target has disappeared. An exporter might be crashed.\"\n - alert: HostOutOfMemory\n expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 \u003c 10\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host out of memory (instance {{ $labels.instance }}).\"\n description: \"Node memory is filling up (\u003c 10% left).\"\n - alert: HostOomKillDetected\n expr: increase(node_vmstat_oom_kill[1m]) \u003e 0\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host OOM kill detected (instance {{ $labels.instance }}).\"\n description: \"OOM kill detected.\"\n - alert: HostMemoryUnderMemoryPressure\n expr: rate(node_vmstat_pgmajfault[1m]) \u003e 1000\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host memory under memory pressure (instance {{ $labels.instance }}).\"\n description: \"The node is under heavy memory pressure. High rate of major page faults.\"\n - alert: HostOutOfDiskSpace\n expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes \u003c 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host out of disk space (instance {{ $labels.instance }}).\"\n description: \"Disk is almost full (\u003c 10% left).\"\n - alert: HostRaidDiskFailure\n expr: node_md_disks{state=\"failed\"} \u003e 0\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host RAID disk failure (instance {{ $labels.instance }}).\"\n description: \"At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap.\"\n - alert: HostConntrackLimit\n expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit \u003e 0.8\n for: 5m\n labels:\n severity: warning\n annotations:\n summary: \"Host conntrack limit (instance {{ $labels.instance }}).\"\n description: \"The number of conntrack is approching limit.\"\n - alert: HostNetworkInterfaceSaturated\n expr: (rate(node_network_receive_bytes_total{device!~\"^tap.*\"}[1m]) + rate(node_network_transmit_bytes_total{device!~\"^tap.*\"}[1m])) / node_network_speed_bytes{device!~\"^tap.*\"} \u003e 0.8\n for: 1m\n labels:\n severity: warning\n annotations:\n summary: \"Host Network Interface Saturated (instance {{ $labels.instance }}).\"\n description: \"The network interface {{ $labels.interface }} on {{ $labels.instance }} is getting overloaded.\"\n - alert: HostSystemdServiceCrashed\n expr: node_systemd_unit_state{state=\"failed\"} == 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host SystemD service crashed (instance {{ $labels.instance }}).\"\n description: \"SystemD service crashed.\"\n - alert: HostEdacCorrectableErrorsDetected\n expr: increase(node_edac_correctable_errors_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: info\n annotations:\n summary: \"Host EDAC Correctable Errors detected (instance {{ $labels.instance }}).\"\n description: '{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.'\n - alert: HostEdacUncorrectableErrorsDetected\n expr: node_edac_uncorrectable_errors_total \u003e 0\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }}).\"\n description: '{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.'\n- name: \"Min.io\"\n rules:\n - alert: MinioDiskOffline\n expr: minio_offline_disks \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Minio disk offline (instance {{ $labels.instance }})\"\n description: \"Minio disk is offline.\"\n - alert: MinioStorageSpaceExhausted\n expr: minio_disk_storage_free_bytes / 1024 / 1024 / 1024 \u003c 10\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Minio storage space exhausted (instance {{ $labels.instance }}).\"\n description: \"Minio storage space is low (\u003c 10 GB).\"\n- name: \"Prometheus\"\n rules:\n - alert: PrometheusConfigurationReloadFailure\n expr: prometheus_config_last_reload_successful != 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus configuration reload failure (instance {{ $labels.instance }}).\"\n description: \"Prometheus configuration reload error.\"\n - alert: PrometheusTooManyRestarts\n expr: changes(process_start_time_seconds{job=~\"prometheus|pushgateway|alertmanager\"}[15m]) \u003e 2\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus too many restarts (instance {{ $labels.instance }}).\"\n description: \"Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping.\"\n - alert: PrometheusAlertmanagerConfigurationReloadFailure\n expr: alertmanager_config_last_reload_successful != 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus AlertManager configuration reload failure (instance {{ $labels.instance }}).\"\n description: \"AlertManager configuration reload error.\"\n - alert: PrometheusRuleEvaluationFailures\n expr: increase(prometheus_rule_evaluation_failures_total[3m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus rule evaluation failures (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\"\n - alert: PrometheusTargetScrapingSlow\n expr: prometheus_target_interval_length_seconds{quantile=\"0.9\"} \u003e 60\n for: 5m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus target scraping slow (instance {{ $labels.instance }}).\"\n description: \"Prometheus is scraping exporters slowly.\"\n - alert: PrometheusTsdbCompactionsFailed\n expr: increase(prometheus_tsdb_compactions_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB compactions failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB compactions failures.\"\n - alert: PrometheusTsdbHeadTruncationsFailed\n expr: increase(prometheus_tsdb_head_truncations_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB head truncations failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB head truncation failures.\"\n - alert: PrometheusTsdbWalCorruptions\n expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB WAL corruptions (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB WAL corruptions.\"\n - alert: PrometheusTsdbWalTruncationsFailed\n expr: increase(prometheus_tsdb_wal_truncations_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB WAL truncation failures.\"\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/prometheus.yml\"\n data = \u003c\u003cEOH\n---\nglobal:\n scrape_interval: 5s\n scrape_timeout: 5s\n evaluation_interval: 5s\n\nalerting:\n alertmanagers:\n - consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'alertmanager' ]\n\nrule_files:\n - 'alerts.yml'\n\nscrape_configs:\n\n - job_name: 'Nomad Cluster'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'nomad-client', 'nomad' ]\n relabel_configs:\n - source_labels: [__meta_consul_tags]\n regex: '(.*)http(.*)'\n action: keep\n metrics_path: /v1/metrics\n params:\n format: [ 'prometheus' ]\n\n - job_name: 'Consul Cluster'\n static_configs:\n - targets: [ '10.30.51.22:8500' ]\n - targets: [ '10.30.51.24:8500' ]\n - targets: [ '10.30.51.25:8500' ]\n - targets: [ '10.30.51.26:8500' ]\n - targets: [ '10.30.51.28:8500' ]\n - targets: [ '10.30.51.29:8500' ]\n - targets: [ '10.30.51.30:8500' ]\n - targets: [ '10.30.51.39:8500' ]\n - targets: [ '10.30.51.40:8500' ]\n - targets: [ '10.30.51.50:8500' ]\n - targets: [ '10.30.51.51:8500' ]\n - targets: [ '10.30.51.65:8500' ]\n - targets: [ '10.30.51.66:8500' ]\n - targets: [ '10.30.51.67:8500' ]\n - targets: [ '10.30.51.68:8500' ]\n - targets: [ '10.30.51.70:8500' ]\n - targets: [ '10.30.51.71:8500' ]\n - targets: [ '10.32.8.14:8500' ]\n - targets: [ '10.32.8.15:8500' ]\n - targets: [ '10.32.8.16:8500' ]\n - targets: [ '10.32.8.17:8500' ]\n metrics_path: /v1/agent/metrics\n params:\n format: [ 'prometheus' ]\n\n - job_name: 'Blackbox Exporter (icmp)'\n static_configs:\n - targets: [ 'gerrit.fd.io' ]\n - targets: [ 'jenkins.fd.io' ]\n - targets: [ '10.32.8.17' ]\n params:\n module: [ 'icmp_v4' ]\n relabel_configs:\n - source_labels: [__address__]\n target_label: __param_target\n - source_labels: [__param_target]\n target_label: instance\n - target_label: __address__\n replacement: localhost:9115\n metrics_path: /probe\n\n - job_name: 'Blackbox Exporter (http)'\n static_configs:\n - targets: [ 'gerrit.fd.io' ]\n - targets: [ 'jenkins.fd.io' ]\n params:\n module: [ 'http_2xx' ]\n relabel_configs:\n - source_labels: [__address__]\n target_label: __param_target\n - source_labels: [__param_target]\n target_label: instance\n - target_label: __address__\n replacement: localhost:9115\n metrics_path: /probe\n\n - job_name: 'Jenkins Job Health Exporter'\n static_configs:\n - targets: [ '10.30.51.22:9186' ]\n metric_relabel_configs:\n - source_labels: [ __name__ ]\n regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'\n action: replace\n replacement: '$1'\n target_label: id\n - source_labels: [ __name__ ]\n regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'\n replacement: 'jenkins_job_$2'\n target_label: __name__\n\n - job_name: 'Node Exporter'\n static_configs:\n - targets: [ '10.30.51.22:9100' ]\n - targets: [ '10.30.51.24:9100' ]\n - targets: [ '10.30.51.25:9100' ]\n - targets: [ '10.30.51.26:9100' ]\n - targets: [ '10.30.51.28:9100' ]\n - targets: [ '10.30.51.29:9100' ]\n - targets: [ '10.30.51.30:9100' ]\n - targets: [ '10.30.51.39:9100' ]\n - targets: [ '10.30.51.40:9100' ]\n - targets: [ '10.30.51.50:9100' ]\n - targets: [ '10.30.51.51:9100' ]\n - targets: [ '10.30.51.65:9100' ]\n - targets: [ '10.30.51.66:9100' ]\n - targets: [ '10.30.51.67:9100' ]\n - targets: [ '10.30.51.68:9100' ]\n - targets: [ '10.30.51.70:9100' ]\n - targets: [ '10.30.51.71:9100' ]\n - targets: [ '10.32.8.14:9100' ]\n - targets: [ '10.32.8.15:9100' ]\n - targets: [ '10.32.8.16:9100' ]\n - targets: [ '10.32.8.17:9100' ]\n\n - job_name: 'Alertmanager'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'alertmanager' ]\n\n - job_name: 'Grafana'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'grafana' ]\n\n - job_name: 'Prometheus'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'prometheus' ]\n\n - job_name: 'Minio'\n bearer_token: eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ3NjQ1ODEzMzcsImlzcyI6InByb21ldGhldXMiLCJzdWIiOiJtaW5pbyJ9.oeTw3EIaiFmlDikrHXWiWXMH2vxLfDLkfjEC7G2N3M_keH_xyA_l2ofLLNYtopa_3GCEZnxLQdPuFZrmgpkDWg\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'storage' ]\n metrics_path: /minio/prometheus/metrics\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"prometheus\"\n port = \"prometheus\"\n tags = [ \"prometheus${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Prometheus Check Live\"\n type = \"http\"\n path = \"/-/healthy\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 2000\n memory = 8192\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"prometheus\" {\n static = 9090\n }\n }\n }\n }\n }\n}",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n%{ if use_canary }\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n%{ endif }\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-${service_name}\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = ${group_count}\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The volume stanza allows the group to specify that it requires a given\n # volume from the cluster.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/volume\n #\n %{ if use_host_volume }\n volume \"prod-volume1-${service_name}\" {\n type = \"host\"\n read_only = false\n source = \"${host_volume}\"\n }\n %{ endif }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"$${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n constraint {\n attribute = \"$${node.class}\"\n value = \"builder\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-${service_name}\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"exec\"\n\n %{ if use_host_volume }\n volume_mount {\n volume = \"prod-volume1-${service_name}\"\n destination = \"${data_dir}\"\n read_only = false\n }\n %{ endif }\n\n %{ if use_vault_provider }\n vault {\n policies = \"${vault_kv_policy_name}\"\n }\n %{ endif }\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n command = \"local/prometheus-${version}.linux-amd64/prometheus\"\n args = [\n \"--config.file=secrets/prometheus.yml\",\n \"--storage.tsdb.path=${data_dir}prometheus/\",\n \"--storage.tsdb.retention.time=7d\"\n ]\n }\n\n # The artifact stanza instructs Nomad to fetch and unpack a remote resource,\n # such as a file, tarball, or binary. Nomad downloads artifacts using the\n # popular go-getter library, which permits downloading artifacts from a\n # variety of locations using a URL as the input source.\n #\n # For more information and examples on the \"artifact\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/artifact\n #\n artifact {\n source = \"${url}\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/alerts.yml\"\n left_delimiter = \"{{{\"\n right_delimiter = \"}}}\"\n data = \u003c\u003cEOH\n---\ngroups:\n- name: \"Jenkins Job Health Exporter\"\n rules:\n - alert: JenkinsJobHealthExporterFailures\n expr: jenkins_job_failure{id=~\".*\"} \u003e jenkins_job_success{id=~\".*\"}\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Jenkins Job Health detected high failure rate on jenkins jobs.\"\n description: \"Job: {{ $labels.id }}\"\n - alert: JenkinsJobHealthExporterUnstable\n expr: jenkins_job_unstable{id=~\".*\"} \u003e jenkins_job_success{id=~\".*\"}\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Jenkins Job Health detected high unstable rate on jenkins jobs.\"\n description: \"Job: {{ $labels.id }}\"\n- name: \"Consul\"\n rules:\n - alert: ConsulServiceHealthcheckFailed\n expr: consul_catalog_service_node_healthy == 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul service healthcheck failed (instance {{ $labels.instance }}).\"\n description: \"Service: `{{ $labels.service_name }}` Healthcheck: `{{ $labels.service_id }}`.\"\n - alert: ConsulMissingMasterNode\n expr: consul_raft_peers \u003c 3\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul missing master node (instance {{ $labels.instance }}).\"\n description: \"Numbers of consul raft peers should be 3, in order to preserve quorum.\"\n - alert: ConsulAgentUnhealthy\n expr: consul_health_node_status{status=\"critical\"} == 1\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul agent unhealthy (instance {{ $labels.instance }}).\"\n description: \"A Consul agent is down.\"\n- name: \"Hosts\"\n rules:\n - alert: NodeDown\n expr: up == 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus target missing (instance {{ $labels.instance }}).\"\n description: \"A Prometheus target has disappeared. An exporter might be crashed.\"\n - alert: HostOutOfMemory\n expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 \u003c 10\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host out of memory (instance {{ $labels.instance }}).\"\n description: \"Node memory is filling up (\u003c 10% left).\"\n - alert: HostOomKillDetected\n expr: increase(node_vmstat_oom_kill[1m]) \u003e 0\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host OOM kill detected (instance {{ $labels.instance }}).\"\n description: \"OOM kill detected.\"\n - alert: HostMemoryUnderMemoryPressure\n expr: rate(node_vmstat_pgmajfault[1m]) \u003e 1000\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host memory under memory pressure (instance {{ $labels.instance }}).\"\n description: \"The node is under heavy memory pressure. High rate of major page faults.\"\n - alert: HostOutOfDiskSpace\n expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes \u003c 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host out of disk space (instance {{ $labels.instance }}).\"\n description: \"Disk is almost full (\u003c 10% left).\"\n - alert: HostRaidDiskFailure\n expr: node_md_disks{state=\"failed\"} \u003e 0\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host RAID disk failure (instance {{ $labels.instance }}).\"\n description: \"At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap.\"\n - alert: HostConntrackLimit\n expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit \u003e 0.8\n for: 5m\n labels:\n severity: warning\n annotations:\n summary: \"Host conntrack limit (instance {{ $labels.instance }}).\"\n description: \"The number of conntrack is approching limit.\"\n - alert: HostNetworkInterfaceSaturated\n expr: (rate(node_network_receive_bytes_total{device!~\"^tap.*\"}[1m]) + rate(node_network_transmit_bytes_total{device!~\"^tap.*\"}[1m])) / node_network_speed_bytes{device!~\"^tap.*\"} \u003e 0.8\n for: 1m\n labels:\n severity: warning\n annotations:\n summary: \"Host Network Interface Saturated (instance {{ $labels.instance }}).\"\n description: \"The network interface {{ $labels.interface }} on {{ $labels.instance }} is getting overloaded.\"\n - alert: HostSystemdServiceCrashed\n expr: node_systemd_unit_state{state=\"failed\"} == 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host SystemD service crashed (instance {{ $labels.instance }}).\"\n description: \"SystemD service crashed.\"\n - alert: HostEdacCorrectableErrorsDetected\n expr: increase(node_edac_correctable_errors_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: info\n annotations:\n summary: \"Host EDAC Correctable Errors detected (instance {{ $labels.instance }}).\"\n description: '{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.'\n - alert: HostEdacUncorrectableErrorsDetected\n expr: node_edac_uncorrectable_errors_total \u003e 0\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }}).\"\n description: '{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.'\n- name: \"Min.io\"\n rules:\n - alert: MinioDiskOffline\n expr: minio_offline_disks \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Minio disk offline (instance {{ $labels.instance }})\"\n description: \"Minio disk is offline.\"\n - alert: MinioStorageSpaceExhausted\n expr: minio_disk_storage_free_bytes / 1024 / 1024 / 1024 \u003c 10\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Minio storage space exhausted (instance {{ $labels.instance }}).\"\n description: \"Minio storage space is low (\u003c 10 GB).\"\n- name: \"Prometheus\"\n rules:\n - alert: PrometheusConfigurationReloadFailure\n expr: prometheus_config_last_reload_successful != 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus configuration reload failure (instance {{ $labels.instance }}).\"\n description: \"Prometheus configuration reload error.\"\n - alert: PrometheusTooManyRestarts\n expr: changes(process_start_time_seconds{job=~\"prometheus|pushgateway|alertmanager\"}[15m]) \u003e 2\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus too many restarts (instance {{ $labels.instance }}).\"\n description: \"Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping.\"\n - alert: PrometheusAlertmanagerConfigurationReloadFailure\n expr: alertmanager_config_last_reload_successful != 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus AlertManager configuration reload failure (instance {{ $labels.instance }}).\"\n description: \"AlertManager configuration reload error.\"\n - alert: PrometheusRuleEvaluationFailures\n expr: increase(prometheus_rule_evaluation_failures_total[3m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus rule evaluation failures (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\"\n - alert: PrometheusTargetScrapingSlow\n expr: prometheus_target_interval_length_seconds{quantile=\"0.9\"} \u003e 60\n for: 5m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus target scraping slow (instance {{ $labels.instance }}).\"\n description: \"Prometheus is scraping exporters slowly.\"\n - alert: PrometheusTsdbCompactionsFailed\n expr: increase(prometheus_tsdb_compactions_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB compactions failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB compactions failures.\"\n - alert: PrometheusTsdbHeadTruncationsFailed\n expr: increase(prometheus_tsdb_head_truncations_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB head truncations failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB head truncation failures.\"\n - alert: PrometheusTsdbWalCorruptions\n expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB WAL corruptions (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB WAL corruptions.\"\n - alert: PrometheusTsdbWalTruncationsFailed\n expr: increase(prometheus_tsdb_wal_truncations_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB WAL truncation failures.\"\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/prometheus.yml\"\n data = \u003c\u003cEOH\n---\nglobal:\n scrape_interval: 5s\n scrape_timeout: 5s\n evaluation_interval: 5s\n\nalerting:\n alertmanagers:\n - consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'alertmanager' ]\n\nrule_files:\n - 'alerts.yml'\n\nscrape_configs:\n\n - job_name: 'Nomad Cluster'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'nomad-client', 'nomad' ]\n relabel_configs:\n - source_labels: [__meta_consul_tags]\n regex: '(.*)http(.*)'\n action: keep\n metrics_path: /v1/metrics\n params:\n format: [ 'prometheus' ]\n\n - job_name: 'Consul Cluster'\n static_configs:\n - targets: [ '10.30.51.22:8500' ]\n - targets: [ '10.30.51.24:8500' ]\n - targets: [ '10.30.51.25:8500' ]\n - targets: [ '10.30.51.26:8500' ]\n - targets: [ '10.30.51.28:8500' ]\n - targets: [ '10.30.51.29:8500' ]\n - targets: [ '10.30.51.30:8500' ]\n - targets: [ '10.30.51.39:8500' ]\n - targets: [ '10.30.51.40:8500' ]\n - targets: [ '10.30.51.50:8500' ]\n - targets: [ '10.30.51.51:8500' ]\n - targets: [ '10.30.51.65:8500' ]\n - targets: [ '10.30.51.66:8500' ]\n - targets: [ '10.30.51.67:8500' ]\n - targets: [ '10.30.51.68:8500' ]\n - targets: [ '10.30.51.70:8500' ]\n - targets: [ '10.30.51.71:8500' ]\n - targets: [ '10.32.8.14:8500' ]\n - targets: [ '10.32.8.15:8500' ]\n - targets: [ '10.32.8.16:8500' ]\n - targets: [ '10.32.8.17:8500' ]\n metrics_path: /v1/agent/metrics\n params:\n format: [ 'prometheus' ]\n\n - job_name: 'Blackbox Exporter (icmp)'\n static_configs:\n - targets: [ 'gerrit.fd.io' ]\n - targets: [ 'jenkins.fd.io' ]\n - targets: [ '10.32.8.17' ]\n params:\n module: [ 'icmp_v4' ]\n relabel_configs:\n - source_labels: [__address__]\n target_label: __param_target\n - source_labels: [__param_target]\n target_label: instance\n - target_label: __address__\n replacement: localhost:9115\n metrics_path: /probe\n\n - job_name: 'Blackbox Exporter (http)'\n static_configs:\n - targets: [ 'gerrit.fd.io' ]\n - targets: [ 'jenkins.fd.io' ]\n params:\n module: [ 'http_2xx' ]\n relabel_configs:\n - source_labels: [__address__]\n target_label: __param_target\n - source_labels: [__param_target]\n target_label: instance\n - target_label: __address__\n replacement: localhost:9115\n metrics_path: /probe\n\n - job_name: 'Jenkins Job Health Exporter'\n static_configs:\n - targets: [ '10.30.51.22:9186' ]\n metric_relabel_configs:\n - source_labels: [ __name__ ]\n regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'\n action: replace\n replacement: '$1'\n target_label: id\n - source_labels: [ __name__ ]\n regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'\n replacement: 'jenkins_job_$2'\n target_label: __name__\n\n - job_name: 'Node Exporter'\n static_configs:\n - targets: [ '10.30.51.22:9100' ]\n - targets: [ '10.30.51.24:9100' ]\n - targets: [ '10.30.51.25:9100' ]\n - targets: [ '10.30.51.26:9100' ]\n - targets: [ '10.30.51.28:9100' ]\n - targets: [ '10.30.51.29:9100' ]\n - targets: [ '10.30.51.30:9100' ]\n - targets: [ '10.30.51.39:9100' ]\n - targets: [ '10.30.51.40:9100' ]\n - targets: [ '10.30.51.50:9100' ]\n - targets: [ '10.30.51.51:9100' ]\n - targets: [ '10.30.51.65:9100' ]\n - targets: [ '10.30.51.66:9100' ]\n - targets: [ '10.30.51.67:9100' ]\n - targets: [ '10.30.51.68:9100' ]\n - targets: [ '10.30.51.70:9100' ]\n - targets: [ '10.30.51.71:9100' ]\n - targets: [ '10.32.8.14:9100' ]\n - targets: [ '10.32.8.15:9100' ]\n - targets: [ '10.32.8.16:9100' ]\n - targets: [ '10.32.8.17:9100' ]\n\n - job_name: 'Alertmanager'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'alertmanager' ]\n\n - job_name: 'Grafana'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'grafana' ]\n\n - job_name: 'Prometheus'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'prometheus' ]\n\n - job_name: 'Minio'\n bearer_token: eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ3NjQ1ODEzMzcsImlzcyI6InByb21ldGhldXMiLCJzdWIiOiJtaW5pbyJ9.oeTw3EIaiFmlDikrHXWiWXMH2vxLfDLkfjEC7G2N3M_keH_xyA_l2ofLLNYtopa_3GCEZnxLQdPuFZrmgpkDWg\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'storage' ]\n metrics_path: /minio/prometheus/metrics\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"${service_name}\"\n port = \"${service_name}\"\n tags = [ \"${service_name}$${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Prometheus Check Live\"\n type = \"http\"\n path = \"/-/healthy\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = ${cpu}\n memory = ${mem}\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"${service_name}\" {\n static = ${port}\n }\n }\n }\n }\n }\n}",
- "vars": {
- "cpu": "2000",
- "data_dir": "/data/",
- "datacenters": "yul1",
- "group_count": "4",
- "host_volume": "prod-volume-data1-1",
- "job_name": "prod-prometheus",
- "mem": "8192",
- "port": "9090",
- "service_name": "prometheus",
- "url": "https://github.com/prometheus/prometheus/releases/download/v2.28.1/prometheus-2.28.1.linux-amd64.tar.gz",
- "use_canary": "true",
- "use_host_volume": "true",
- "use_vault_provider": "false",
- "version": "2.28.1"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.prometheus",
- "mode": "managed",
- "type": "nomad_job",
- "name": "nomad_job_prometheus",
- "provider": "provider[\"registry.terraform.io/hashicorp/nomad\"].yul1",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "allocation_ids": [
- "655f7aa1-199f-4241-9ff0-37620b2f3f10",
- "f80ed1f8-7783-6b17-0ed0-db3ab7786dbf",
- "0df91982-632b-a2dd-5149-d81cd93235f6",
- "277d7a3b-c31a-e4eb-a8fe-d97377fb4d9c",
- "6d8dc89f-788d-1229-5558-0e56398eb6f0",
- "b9f0d581-0093-d4e0-a502-9f15475b7b93"
- ],
- "datacenters": [
- "yul1"
- ],
- "deployment_id": "c5203cd1-b38b-44ca-21d0-4b1638ad382a",
- "deployment_status": "successful",
- "deregister_on_destroy": true,
- "deregister_on_id_change": true,
- "detach": false,
- "id": "prod-prometheus",
- "jobspec": "job \"prod-prometheus\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-prometheus\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 4\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The volume stanza allows the group to specify that it requires a given\n # volume from the cluster.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/volume\n #\n \n volume \"prod-volume1-prometheus\" {\n type = \"host\"\n read_only = false\n source = \"prod-volume-data1-1\"\n }\n \n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n constraint {\n attribute = \"${node.class}\"\n value = \"builder\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-prometheus\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"exec\"\n\n \n volume_mount {\n volume = \"prod-volume1-prometheus\"\n destination = \"/data/\"\n read_only = false\n }\n \n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n command = \"local/prometheus-2.28.1.linux-amd64/prometheus\"\n args = [\n \"--config.file=secrets/prometheus.yml\",\n \"--storage.tsdb.path=/data/prometheus/\",\n \"--storage.tsdb.retention.time=7d\"\n ]\n }\n\n # The artifact stanza instructs Nomad to fetch and unpack a remote resource,\n # such as a file, tarball, or binary. Nomad downloads artifacts using the\n # popular go-getter library, which permits downloading artifacts from a\n # variety of locations using a URL as the input source.\n #\n # For more information and examples on the \"artifact\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/artifact\n #\n artifact {\n source = \"https://github.com/prometheus/prometheus/releases/download/v2.28.1/prometheus-2.28.1.linux-amd64.tar.gz\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/alerts.yml\"\n left_delimiter = \"{{{\"\n right_delimiter = \"}}}\"\n data = \u003c\u003cEOH\n---\ngroups:\n- name: \"Jenkins Job Health Exporter\"\n rules:\n - alert: JenkinsJobHealthExporterFailures\n expr: jenkins_job_failure{id=~\".*\"} \u003e jenkins_job_success{id=~\".*\"}\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Jenkins Job Health detected high failure rate on jenkins jobs.\"\n description: \"Job: {{ $labels.id }}\"\n - alert: JenkinsJobHealthExporterUnstable\n expr: jenkins_job_unstable{id=~\".*\"} \u003e jenkins_job_success{id=~\".*\"}\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Jenkins Job Health detected high unstable rate on jenkins jobs.\"\n description: \"Job: {{ $labels.id }}\"\n- name: \"Consul\"\n rules:\n - alert: ConsulServiceHealthcheckFailed\n expr: consul_catalog_service_node_healthy == 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul service healthcheck failed (instance {{ $labels.instance }}).\"\n description: \"Service: `{{ $labels.service_name }}` Healthcheck: `{{ $labels.service_id }}`.\"\n - alert: ConsulMissingMasterNode\n expr: consul_raft_peers \u003c 3\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul missing master node (instance {{ $labels.instance }}).\"\n description: \"Numbers of consul raft peers should be 3, in order to preserve quorum.\"\n - alert: ConsulAgentUnhealthy\n expr: consul_health_node_status{status=\"critical\"} == 1\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul agent unhealthy (instance {{ $labels.instance }}).\"\n description: \"A Consul agent is down.\"\n- name: \"Hosts\"\n rules:\n - alert: NodeDown\n expr: up == 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus target missing (instance {{ $labels.instance }}).\"\n description: \"A Prometheus target has disappeared. An exporter might be crashed.\"\n - alert: HostOutOfMemory\n expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 \u003c 10\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host out of memory (instance {{ $labels.instance }}).\"\n description: \"Node memory is filling up (\u003c 10% left).\"\n - alert: HostOomKillDetected\n expr: increase(node_vmstat_oom_kill[1m]) \u003e 0\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host OOM kill detected (instance {{ $labels.instance }}).\"\n description: \"OOM kill detected.\"\n - alert: HostMemoryUnderMemoryPressure\n expr: rate(node_vmstat_pgmajfault[1m]) \u003e 1000\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host memory under memory pressure (instance {{ $labels.instance }}).\"\n description: \"The node is under heavy memory pressure. High rate of major page faults.\"\n - alert: HostOutOfDiskSpace\n expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes \u003c 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host out of disk space (instance {{ $labels.instance }}).\"\n description: \"Disk is almost full (\u003c 10% left).\"\n - alert: HostRaidDiskFailure\n expr: node_md_disks{state=\"failed\"} \u003e 0\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host RAID disk failure (instance {{ $labels.instance }}).\"\n description: \"At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap.\"\n - alert: HostConntrackLimit\n expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit \u003e 0.8\n for: 5m\n labels:\n severity: warning\n annotations:\n summary: \"Host conntrack limit (instance {{ $labels.instance }}).\"\n description: \"The number of conntrack is approching limit.\"\n - alert: HostNetworkInterfaceSaturated\n expr: (rate(node_network_receive_bytes_total{device!~\"^tap.*\"}[1m]) + rate(node_network_transmit_bytes_total{device!~\"^tap.*\"}[1m])) / node_network_speed_bytes{device!~\"^tap.*\"} \u003e 0.8\n for: 1m\n labels:\n severity: warning\n annotations:\n summary: \"Host Network Interface Saturated (instance {{ $labels.instance }}).\"\n description: \"The network interface {{ $labels.interface }} on {{ $labels.instance }} is getting overloaded.\"\n - alert: HostSystemdServiceCrashed\n expr: node_systemd_unit_state{state=\"failed\"} == 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host SystemD service crashed (instance {{ $labels.instance }}).\"\n description: \"SystemD service crashed.\"\n - alert: HostEdacCorrectableErrorsDetected\n expr: increase(node_edac_correctable_errors_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: info\n annotations:\n summary: \"Host EDAC Correctable Errors detected (instance {{ $labels.instance }}).\"\n description: '{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.'\n - alert: HostEdacUncorrectableErrorsDetected\n expr: node_edac_uncorrectable_errors_total \u003e 0\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }}).\"\n description: '{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.'\n- name: \"Min.io\"\n rules:\n - alert: MinioDiskOffline\n expr: minio_offline_disks \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Minio disk offline (instance {{ $labels.instance }})\"\n description: \"Minio disk is offline.\"\n - alert: MinioStorageSpaceExhausted\n expr: minio_disk_storage_free_bytes / 1024 / 1024 / 1024 \u003c 10\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Minio storage space exhausted (instance {{ $labels.instance }}).\"\n description: \"Minio storage space is low (\u003c 10 GB).\"\n- name: \"Prometheus\"\n rules:\n - alert: PrometheusConfigurationReloadFailure\n expr: prometheus_config_last_reload_successful != 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus configuration reload failure (instance {{ $labels.instance }}).\"\n description: \"Prometheus configuration reload error.\"\n - alert: PrometheusTooManyRestarts\n expr: changes(process_start_time_seconds{job=~\"prometheus|pushgateway|alertmanager\"}[15m]) \u003e 2\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus too many restarts (instance {{ $labels.instance }}).\"\n description: \"Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping.\"\n - alert: PrometheusAlertmanagerConfigurationReloadFailure\n expr: alertmanager_config_last_reload_successful != 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus AlertManager configuration reload failure (instance {{ $labels.instance }}).\"\n description: \"AlertManager configuration reload error.\"\n - alert: PrometheusRuleEvaluationFailures\n expr: increase(prometheus_rule_evaluation_failures_total[3m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus rule evaluation failures (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\"\n - alert: PrometheusTargetScrapingSlow\n expr: prometheus_target_interval_length_seconds{quantile=\"0.9\"} \u003e 60\n for: 5m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus target scraping slow (instance {{ $labels.instance }}).\"\n description: \"Prometheus is scraping exporters slowly.\"\n - alert: PrometheusTsdbCompactionsFailed\n expr: increase(prometheus_tsdb_compactions_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB compactions failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB compactions failures.\"\n - alert: PrometheusTsdbHeadTruncationsFailed\n expr: increase(prometheus_tsdb_head_truncations_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB head truncations failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB head truncation failures.\"\n - alert: PrometheusTsdbWalCorruptions\n expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB WAL corruptions (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB WAL corruptions.\"\n - alert: PrometheusTsdbWalTruncationsFailed\n expr: increase(prometheus_tsdb_wal_truncations_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB WAL truncation failures.\"\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/prometheus.yml\"\n data = \u003c\u003cEOH\n---\nglobal:\n scrape_interval: 5s\n scrape_timeout: 5s\n evaluation_interval: 5s\n\nalerting:\n alertmanagers:\n - consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'alertmanager' ]\n\nrule_files:\n - 'alerts.yml'\n\nscrape_configs:\n\n - job_name: 'Nomad Cluster'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'nomad-client', 'nomad' ]\n relabel_configs:\n - source_labels: [__meta_consul_tags]\n regex: '(.*)http(.*)'\n action: keep\n metrics_path: /v1/metrics\n params:\n format: [ 'prometheus' ]\n\n - job_name: 'Consul Cluster'\n static_configs:\n - targets: [ '10.30.51.22:8500' ]\n - targets: [ '10.30.51.24:8500' ]\n - targets: [ '10.30.51.25:8500' ]\n - targets: [ '10.30.51.26:8500' ]\n - targets: [ '10.30.51.28:8500' ]\n - targets: [ '10.30.51.29:8500' ]\n - targets: [ '10.30.51.30:8500' ]\n - targets: [ '10.30.51.39:8500' ]\n - targets: [ '10.30.51.40:8500' ]\n - targets: [ '10.30.51.50:8500' ]\n - targets: [ '10.30.51.51:8500' ]\n - targets: [ '10.30.51.65:8500' ]\n - targets: [ '10.30.51.66:8500' ]\n - targets: [ '10.30.51.67:8500' ]\n - targets: [ '10.30.51.68:8500' ]\n - targets: [ '10.30.51.70:8500' ]\n - targets: [ '10.30.51.71:8500' ]\n - targets: [ '10.32.8.14:8500' ]\n - targets: [ '10.32.8.15:8500' ]\n - targets: [ '10.32.8.16:8500' ]\n - targets: [ '10.32.8.17:8500' ]\n metrics_path: /v1/agent/metrics\n params:\n format: [ 'prometheus' ]\n\n - job_name: 'Blackbox Exporter (icmp)'\n static_configs:\n - targets: [ 'gerrit.fd.io' ]\n - targets: [ 'jenkins.fd.io' ]\n - targets: [ '10.32.8.17' ]\n params:\n module: [ 'icmp_v4' ]\n relabel_configs:\n - source_labels: [__address__]\n target_label: __param_target\n - source_labels: [__param_target]\n target_label: instance\n - target_label: __address__\n replacement: localhost:9115\n metrics_path: /probe\n\n - job_name: 'Blackbox Exporter (http)'\n static_configs:\n - targets: [ 'gerrit.fd.io' ]\n - targets: [ 'jenkins.fd.io' ]\n params:\n module: [ 'http_2xx' ]\n relabel_configs:\n - source_labels: [__address__]\n target_label: __param_target\n - source_labels: [__param_target]\n target_label: instance\n - target_label: __address__\n replacement: localhost:9115\n metrics_path: /probe\n\n - job_name: 'Jenkins Job Health Exporter'\n static_configs:\n - targets: [ '10.30.51.22:9186' ]\n metric_relabel_configs:\n - source_labels: [ __name__ ]\n regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'\n action: replace\n replacement: '$1'\n target_label: id\n - source_labels: [ __name__ ]\n regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'\n replacement: 'jenkins_job_$2'\n target_label: __name__\n\n - job_name: 'Node Exporter'\n static_configs:\n - targets: [ '10.30.51.22:9100' ]\n - targets: [ '10.30.51.24:9100' ]\n - targets: [ '10.30.51.25:9100' ]\n - targets: [ '10.30.51.26:9100' ]\n - targets: [ '10.30.51.28:9100' ]\n - targets: [ '10.30.51.29:9100' ]\n - targets: [ '10.30.51.30:9100' ]\n - targets: [ '10.30.51.39:9100' ]\n - targets: [ '10.30.51.40:9100' ]\n - targets: [ '10.30.51.50:9100' ]\n - targets: [ '10.30.51.51:9100' ]\n - targets: [ '10.30.51.65:9100' ]\n - targets: [ '10.30.51.66:9100' ]\n - targets: [ '10.30.51.67:9100' ]\n - targets: [ '10.30.51.68:9100' ]\n - targets: [ '10.30.51.70:9100' ]\n - targets: [ '10.30.51.71:9100' ]\n - targets: [ '10.32.8.14:9100' ]\n - targets: [ '10.32.8.15:9100' ]\n - targets: [ '10.32.8.16:9100' ]\n - targets: [ '10.32.8.17:9100' ]\n\n - job_name: 'Alertmanager'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'alertmanager' ]\n\n - job_name: 'Grafana'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'grafana' ]\n\n - job_name: 'Prometheus'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'prometheus' ]\n\n - job_name: 'Minio'\n bearer_token: eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ3NjQ1ODEzMzcsImlzcyI6InByb21ldGhldXMiLCJzdWIiOiJtaW5pbyJ9.oeTw3EIaiFmlDikrHXWiWXMH2vxLfDLkfjEC7G2N3M_keH_xyA_l2ofLLNYtopa_3GCEZnxLQdPuFZrmgpkDWg\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'storage' ]\n metrics_path: /minio/prometheus/metrics\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"prometheus\"\n port = \"prometheus\"\n tags = [ \"prometheus${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Prometheus Check Live\"\n type = \"http\"\n path = \"/-/healthy\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 2000\n memory = 8192\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"prometheus\" {\n static = 9090\n }\n }\n }\n }\n }\n}",
- "json": null,
- "modify_index": "9138750",
- "name": "prod-prometheus",
- "namespace": "default",
- "policy_override": null,
- "purge_on_destroy": null,
- "region": "global",
- "task_groups": [
- {
- "count": 4,
- "meta": {},
- "name": "prod-group1-prometheus",
- "task": [
- {
- "driver": "exec",
- "meta": {},
- "name": "prod-task1-prometheus",
- "volume_mounts": [
- {
- "destination": "/data/",
- "read_only": false,
- "volume": "prod-volume1-prometheus"
- }
- ]
- }
- ],
- "volumes": [
- {
- "name": "prod-volume1-prometheus",
- "read_only": false,
- "source": "prod-volume-data1-1",
- "type": "host"
- }
- ]
- }
- ],
- "type": "service"
- },
- "sensitive_attributes": [],
- "private": "bnVsbA==",
- "dependencies": [
- "module.prometheus.data.template_file.nomad_job_prometheus"
- ]
- }
- ]
- },
- {
- "module": "module.vpp_device",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_csit_shim",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "bebf27b2f8eb532adad3cabb86953deed4d0e9970ab0524d7857b3ebeebb917d",
- "rendered": "job \"prod-device-csit-shim\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"system\"\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-csit-shim-amd\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n constraint {\n attribute = \"${node.class}\"\n value = \"csit\"\n }\n\n restart {\n interval = \"1m\"\n attempts = 3\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-csit-shim-amd\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"fdiotools/csit_shim-ubuntu2004:2021_03_04_142103_UTC-x86_64\"\n network_mode = \"host\"\n pid_mode = \"host\"\n volumes = [\n \"/var/run/docker.sock:/var/run/docker.sock\"\n ]\n privileged = true\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 1500\n memory = 4096\n network {\n port \"ssh\" {\n static = 6022\n }\n port \"ssh2\" {\n static = 6023\n }\n }\n }\n }\n }\n\n group \"prod-group1-csit-shim-arm\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n constraint {\n attribute = \"${node.class}\"\n value = \"csitarm\"\n }\n\n restart {\n interval = \"1m\"\n attempts = 3\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-csit-shim-arm\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"fdiotools/csit_shim-ubuntu2004:2021_03_02_143938_UTC-aarch64\"\n network_mode = \"host\"\n pid_mode = \"host\"\n volumes = [\n \"/var/run/docker.sock:/var/run/docker.sock\"\n ]\n privileged = true\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 1500\n memory = 4096\n network {\n port \"ssh\" {\n static = 6022\n }\n port \"ssh2\" {\n static = 6023\n }\n }\n }\n }\n }\n}",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"system\"\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-csit-shim-amd\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = ${group_count}\n\n constraint {\n attribute = \"$${node.class}\"\n value = \"csit\"\n }\n\n restart {\n interval = \"1m\"\n attempts = 3\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-csit-shim-amd\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"${image_x86_64}\"\n network_mode = \"host\"\n pid_mode = \"host\"\n volumes = [\n \"/var/run/docker.sock:/var/run/docker.sock\"\n ]\n privileged = true\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = ${cpu}\n memory = ${mem}\n network {\n port \"ssh\" {\n static = 6022\n }\n port \"ssh2\" {\n static = 6023\n }\n }\n }\n }\n }\n\n group \"prod-group1-csit-shim-arm\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = ${group_count}\n\n constraint {\n attribute = \"$${node.class}\"\n value = \"csitarm\"\n }\n\n restart {\n interval = \"1m\"\n attempts = 3\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-csit-shim-arm\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"${image_aarch64}\"\n network_mode = \"host\"\n pid_mode = \"host\"\n volumes = [\n \"/var/run/docker.sock:/var/run/docker.sock\"\n ]\n privileged = true\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = ${cpu}\n memory = ${mem}\n network {\n port \"ssh\" {\n static = 6022\n }\n port \"ssh2\" {\n static = 6023\n }\n }\n }\n }\n }\n}",
- "vars": {
- "cpu": "1500",
- "datacenters": "yul1",
- "group_count": "1",
- "image_aarch64": "fdiotools/csit_shim-ubuntu2004:2021_03_02_143938_UTC-aarch64",
- "image_x86_64": "fdiotools/csit_shim-ubuntu2004:2021_03_04_142103_UTC-x86_64",
- "job_name": "prod-device-csit-shim",
- "mem": "4096"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.vpp_device",
- "mode": "managed",
- "type": "nomad_job",
- "name": "nomad_job_csit_shim",
- "provider": "provider[\"registry.terraform.io/hashicorp/nomad\"].yul1",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "allocation_ids": [],
- "datacenters": [
- "yul1"
- ],
- "deployment_id": "",
- "deployment_status": "",
- "deregister_on_destroy": true,
- "deregister_on_id_change": true,
- "detach": false,
- "id": "prod-device-csit-shim",
- "jobspec": "job \"prod-device-csit-shim\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"system\"\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-csit-shim-amd\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n constraint {\n attribute = \"${node.class}\"\n value = \"csit\"\n }\n\n restart {\n interval = \"1m\"\n attempts = 3\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-csit-shim-amd\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"fdiotools/csit_shim-ubuntu2004:2021_03_04_142103_UTC-x86_64\"\n network_mode = \"host\"\n pid_mode = \"host\"\n volumes = [\n \"/var/run/docker.sock:/var/run/docker.sock\"\n ]\n privileged = true\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 1500\n memory = 4096\n network {\n port \"ssh\" {\n static = 6022\n }\n port \"ssh2\" {\n static = 6023\n }\n }\n }\n }\n }\n\n group \"prod-group1-csit-shim-arm\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n constraint {\n attribute = \"${node.class}\"\n value = \"csitarm\"\n }\n\n restart {\n interval = \"1m\"\n attempts = 3\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-csit-shim-arm\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"fdiotools/csit_shim-ubuntu2004:2021_03_02_143938_UTC-aarch64\"\n network_mode = \"host\"\n pid_mode = \"host\"\n volumes = [\n \"/var/run/docker.sock:/var/run/docker.sock\"\n ]\n privileged = true\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 1500\n memory = 4096\n network {\n port \"ssh\" {\n static = 6022\n }\n port \"ssh2\" {\n static = 6023\n }\n }\n }\n }\n }\n}",
- "json": null,
- "modify_index": "7575030",
- "name": "prod-device-csit-shim",
- "namespace": "default",
- "policy_override": null,
- "purge_on_destroy": null,
- "region": "global",
- "task_groups": [
- {
- "count": 1,
- "meta": {},
- "name": "prod-group1-csit-shim-amd",
- "task": [
- {
- "driver": "docker",
- "meta": {},
- "name": "prod-task1-csit-shim-amd",
- "volume_mounts": []
- }
- ],
- "volumes": []
- },
- {
- "count": 1,
- "meta": {},
- "name": "prod-group1-csit-shim-arm",
- "task": [
- {
- "driver": "docker",
- "meta": {},
- "name": "prod-task1-csit-shim-arm",
- "volume_mounts": []
- }
- ],
- "volumes": []
- }
- ],
- "type": "system"
- },
- "sensitive_attributes": [],
- "private": "bnVsbA==",
- "dependencies": [
- "module.vpp_device.data.template_file.nomad_job_csit_shim"
- ]
- }
- ]
- }
- ]
-}
diff --git a/fdio.infra.terraform/1n_nmd/terraform.tfstate.backup b/fdio.infra.terraform/1n_nmd/terraform.tfstate.backup
deleted file mode 100644
index 5397697870..0000000000
--- a/fdio.infra.terraform/1n_nmd/terraform.tfstate.backup
+++ /dev/null
@@ -1,635 +0,0 @@
-{
- "version": 4,
- "terraform_version": "1.0.4",
- "serial": 1187,
- "lineage": "e4e7f30a-652d-7a31-e31c-5e3a3388c9b9",
- "outputs": {},
- "resources": [
- {
- "module": "module.alertmanager",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_alertmanager",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "7e96dc4e316a25b07430e5ffc2ecdb94ae41f06770ba8667ae4d5f77ef25c6f7",
- "rendered": "job \"prod-alertmanager\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-alertmanager\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-alertmanager\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"exec\"\n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n command = \"local/alertmanager-0.21.0.linux-amd64/alertmanager\"\n args = [\n \"--config.file=secrets/alertmanager.yml\"\n ]\n }\n\n # The artifact stanza instructs Nomad to fetch and unpack a remote resource,\n # such as a file, tarball, or binary. Nomad downloads artifacts using the\n # popular go-getter library, which permits downloading artifacts from a\n # variety of locations using a URL as the input source.\n #\n # For more information and examples on the \"artifact\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/artifact\n #\n artifact {\n source = \"https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/alertmanager.yml\"\n left_delimiter = \"{{{\"\n right_delimiter = \"}}}\"\n data = \u003c\u003cEOH\n# The directory from which notification templates are read.\ntemplates:\n- '/etc/alertmanager/template/*.tmpl'\n\n#tls_config:\n# # CA certificate to validate the server certificate with.\n# ca_file: \u003cfilepath\u003e ]\n#\n# # Certificate and key files for client cert authentication to the server.\n# cert_file: \u003cfilepath\u003e\n# key_file: \u003cfilepath\u003e\n#\n# # ServerName extension to indicate the name of the server.\n# # http://tools.ietf.org/html/rfc4366#section-3.1\n# server_name: \u003cstring\u003e\n#\n# # Disable validation of the server certificate.\n# insecure_skip_verify: true\n\n# The root route on which each incoming alert enters.\nroute:\n receiver: 'default-slack-receiver'\n\n # The labels by which incoming alerts are grouped together. For example,\n # multiple alerts coming in for cluster=A and alertname=LatencyHigh would\n # be batched into a single group.\n #\n # To aggregate by all possible labels use '...' as the sole label name.\n # This effectively disables aggregation entirely, passing through all\n # alerts as-is. This is unlikely to be what you want, unless you have\n # a very low alert volume or your upstream notification system performs\n # its own grouping. Example: group_by: [...]\n group_by: ['alertname']\n\n # When a new group of alerts is created by an incoming alert, wait at\n # least 'group_wait' to send the initial notification.\n # This way ensures that you get multiple alerts for the same group that start\n # firing shortly after another are batched together on the first\n # notification.\n group_wait: 30s\n\n # When the first notification was sent, wait 'group_interval' to send a batch\n # of new alerts that started firing for that group.\n group_interval: 5m\n\n # If an alert has successfully been sent, wait 'repeat_interval' to\n # resend them.\n repeat_interval: 3h\n\n # All the above attributes are inherited by all child routes and can\n # overwritten on each.\n # The child route trees.\n routes:\n - match_re:\n alertname: JenkinsJob.*\n receiver: jenkins-slack-receiver\n routes:\n - match:\n severity: critical\n receiver: 'jenkins-slack-receiver'\n\n - match_re:\n service: .*\n receiver: default-slack-receiver\n routes:\n - match:\n severity: critical\n receiver: 'default-slack-receiver'\n\n# Inhibition rules allow to mute a set of alerts given that another alert is\n# firing.\n# We use this to mute any warning-level notifications if the same alert is\n# already critical.\ninhibit_rules:\n- source_match:\n severity: 'critical'\n target_match:\n severity: 'warning'\n equal: ['alertname', 'instance']\n\nreceivers:\n- name: 'jenkins-slack-receiver'\n slack_configs:\n - api_url: 'TE07RD1V1/B01U1NV9HV3/hKZXJJ74g2JcISq4K3QC1eG9'\n channel: '#fdio-jobs-monitoring'\n send_resolved: true\n icon_url: https://avatars3.githubusercontent.com/u/3380462\n title: |-\n [{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}\n {{- if gt (len .CommonLabels) (len .GroupLabels) -}}\n {{\" \"}}(\n {{- with .CommonLabels.Remove .GroupLabels.Names }}\n {{- range $index, $label := .SortedPairs -}}\n {{ if $index }}, {{ end }}\n {{- $label.Name }}=\"{{ $label.Value -}}\"\n {{- end }}\n {{- end -}}\n )\n {{- end }}\n text: \u003e-\n {{ range .Alerts -}}\n *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}\n\n *Description:* {{ .Annotations.description }}\n\n *Details:*\n {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`\n {{ end }}\n {{ end }}\n\n- name: 'default-slack-receiver'\n slack_configs:\n - api_url: 'TE07RD1V1/B01UUK23B6C/hZTcCu42FUv8d6rtirHtcYIi'\n channel: '#fdio-infra-monitoring'\n send_resolved: true\n icon_url: https://avatars3.githubusercontent.com/u/3380462\n title: |-\n [{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}\n {{- if gt (len .CommonLabels) (len .GroupLabels) -}}\n {{\" \"}}(\n {{- with .CommonLabels.Remove .GroupLabels.Names }}\n {{- range $index, $label := .SortedPairs -}}\n {{ if $index }}, {{ end }}\n {{- $label.Name }}=\"{{ $label.Value -}}\"\n {{- end }}\n {{- end -}}\n )\n {{- end }}\n text: \u003e-\n {{ range .Alerts -}}\n *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}\n\n *Description:* {{ .Annotations.description }}\n\n *Details:*\n {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`\n {{ end }}\n {{ end }}\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"alertmanager\"\n port = \"alertmanager\"\n tags = [ \"alertmanager${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Alertmanager Check Live\"\n type = \"http\"\n path = \"/-/healthy\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 1000\n memory = 1024\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"alertmanager\" {\n static = 9093\n }\n }\n }\n }\n }\n}",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n%{ if use_canary }\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n%{ endif }\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-${service_name}\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = ${group_count}\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"$${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-${service_name}\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"exec\"\n\n %{ if use_vault_provider }\n vault {\n policies = \"${vault_kv_policy_name}\"\n }\n %{ endif }\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n command = \"local/alertmanager-${version}.linux-amd64/alertmanager\"\n args = [\n \"--config.file=secrets/alertmanager.yml\"\n ]\n }\n\n # The artifact stanza instructs Nomad to fetch and unpack a remote resource,\n # such as a file, tarball, or binary. Nomad downloads artifacts using the\n # popular go-getter library, which permits downloading artifacts from a\n # variety of locations using a URL as the input source.\n #\n # For more information and examples on the \"artifact\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/artifact\n #\n artifact {\n source = \"${url}\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/alertmanager.yml\"\n left_delimiter = \"{{{\"\n right_delimiter = \"}}}\"\n data = \u003c\u003cEOH\n# The directory from which notification templates are read.\ntemplates:\n- '/etc/alertmanager/template/*.tmpl'\n\n#tls_config:\n# # CA certificate to validate the server certificate with.\n# ca_file: \u003cfilepath\u003e ]\n#\n# # Certificate and key files for client cert authentication to the server.\n# cert_file: \u003cfilepath\u003e\n# key_file: \u003cfilepath\u003e\n#\n# # ServerName extension to indicate the name of the server.\n# # http://tools.ietf.org/html/rfc4366#section-3.1\n# server_name: \u003cstring\u003e\n#\n# # Disable validation of the server certificate.\n# insecure_skip_verify: true\n\n# The root route on which each incoming alert enters.\nroute:\n receiver: '${slack_default_receiver}'\n\n # The labels by which incoming alerts are grouped together. For example,\n # multiple alerts coming in for cluster=A and alertname=LatencyHigh would\n # be batched into a single group.\n #\n # To aggregate by all possible labels use '...' as the sole label name.\n # This effectively disables aggregation entirely, passing through all\n # alerts as-is. This is unlikely to be what you want, unless you have\n # a very low alert volume or your upstream notification system performs\n # its own grouping. Example: group_by: [...]\n group_by: ['alertname']\n\n # When a new group of alerts is created by an incoming alert, wait at\n # least 'group_wait' to send the initial notification.\n # This way ensures that you get multiple alerts for the same group that start\n # firing shortly after another are batched together on the first\n # notification.\n group_wait: 30s\n\n # When the first notification was sent, wait 'group_interval' to send a batch\n # of new alerts that started firing for that group.\n group_interval: 5m\n\n # If an alert has successfully been sent, wait 'repeat_interval' to\n # resend them.\n repeat_interval: 3h\n\n # All the above attributes are inherited by all child routes and can\n # overwritten on each.\n # The child route trees.\n routes:\n - match_re:\n alertname: JenkinsJob.*\n receiver: ${slack_jenkins_receiver}\n routes:\n - match:\n severity: critical\n receiver: '${slack_jenkins_receiver}'\n\n - match_re:\n service: .*\n receiver: ${slack_default_receiver}\n routes:\n - match:\n severity: critical\n receiver: '${slack_default_receiver}'\n\n# Inhibition rules allow to mute a set of alerts given that another alert is\n# firing.\n# We use this to mute any warning-level notifications if the same alert is\n# already critical.\ninhibit_rules:\n- source_match:\n severity: 'critical'\n target_match:\n severity: 'warning'\n equal: ['alertname', 'instance']\n\nreceivers:\n- name: '${slack_jenkins_receiver}'\n slack_configs:\n - api_url: '${slack_jenkins_api_key}'\n channel: '#${slack_jenkins_channel}'\n send_resolved: true\n icon_url: https://avatars3.githubusercontent.com/u/3380462\n title: |-\n [{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}\n {{- if gt (len .CommonLabels) (len .GroupLabels) -}}\n {{\" \"}}(\n {{- with .CommonLabels.Remove .GroupLabels.Names }}\n {{- range $index, $label := .SortedPairs -}}\n {{ if $index }}, {{ end }}\n {{- $label.Name }}=\"{{ $label.Value -}}\"\n {{- end }}\n {{- end -}}\n )\n {{- end }}\n text: \u003e-\n {{ range .Alerts -}}\n *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}\n\n *Description:* {{ .Annotations.description }}\n\n *Details:*\n {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`\n {{ end }}\n {{ end }}\n\n- name: '${slack_default_receiver}'\n slack_configs:\n - api_url: '${slack_default_api_key}'\n channel: '#${slack_default_channel}'\n send_resolved: true\n icon_url: https://avatars3.githubusercontent.com/u/3380462\n title: |-\n [{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}\n {{- if gt (len .CommonLabels) (len .GroupLabels) -}}\n {{\" \"}}(\n {{- with .CommonLabels.Remove .GroupLabels.Names }}\n {{- range $index, $label := .SortedPairs -}}\n {{ if $index }}, {{ end }}\n {{- $label.Name }}=\"{{ $label.Value -}}\"\n {{- end }}\n {{- end -}}\n )\n {{- end }}\n text: \u003e-\n {{ range .Alerts -}}\n *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}\n\n *Description:* {{ .Annotations.description }}\n\n *Details:*\n {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`\n {{ end }}\n {{ end }}\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"${service_name}\"\n port = \"${service_name}\"\n tags = [ \"${service_name}$${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Alertmanager Check Live\"\n type = \"http\"\n path = \"/-/healthy\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = ${cpu}\n memory = ${mem}\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"${service_name}\" {\n static = ${port}\n }\n }\n }\n }\n }\n}",
- "vars": {
- "cpu": "1000",
- "datacenters": "yul1",
- "group_count": "1",
- "job_name": "prod-alertmanager",
- "mem": "1024",
- "port": "9093",
- "service_name": "alertmanager",
- "slack_default_api_key": "TE07RD1V1/B01UUK23B6C/hZTcCu42FUv8d6rtirHtcYIi",
- "slack_default_channel": "fdio-infra-monitoring",
- "slack_default_receiver": "default-slack-receiver",
- "slack_jenkins_api_key": "TE07RD1V1/B01U1NV9HV3/hKZXJJ74g2JcISq4K3QC1eG9",
- "slack_jenkins_channel": "fdio-jobs-monitoring",
- "slack_jenkins_receiver": "jenkins-slack-receiver",
- "url": "https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz",
- "use_canary": "true",
- "use_vault_provider": "false",
- "version": "0.21.0"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.alertmanager",
- "mode": "managed",
- "type": "nomad_job",
- "name": "nomad_job_alertmanager",
- "provider": "provider[\"registry.terraform.io/hashicorp/nomad\"].yul1",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "allocation_ids": [
- "778f4f9d-dea8-e5bd-c9cf-f7a7e2d3a24e"
- ],
- "datacenters": [
- "yul1"
- ],
- "deployment_id": "4dc972fd-cb2c-51d8-f730-08b5f7554ceb",
- "deployment_status": "successful",
- "deregister_on_destroy": true,
- "deregister_on_id_change": true,
- "detach": false,
- "id": "prod-alertmanager",
- "jobspec": "job \"prod-alertmanager\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-alertmanager\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-alertmanager\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"exec\"\n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n command = \"local/alertmanager-0.21.0.linux-amd64/alertmanager\"\n args = [\n \"--config.file=secrets/alertmanager.yml\"\n ]\n }\n\n # The artifact stanza instructs Nomad to fetch and unpack a remote resource,\n # such as a file, tarball, or binary. Nomad downloads artifacts using the\n # popular go-getter library, which permits downloading artifacts from a\n # variety of locations using a URL as the input source.\n #\n # For more information and examples on the \"artifact\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/artifact\n #\n artifact {\n source = \"https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/alertmanager.yml\"\n left_delimiter = \"{{{\"\n right_delimiter = \"}}}\"\n data = \u003c\u003cEOH\n# The directory from which notification templates are read.\ntemplates:\n- '/etc/alertmanager/template/*.tmpl'\n\n#tls_config:\n# # CA certificate to validate the server certificate with.\n# ca_file: \u003cfilepath\u003e ]\n#\n# # Certificate and key files for client cert authentication to the server.\n# cert_file: \u003cfilepath\u003e\n# key_file: \u003cfilepath\u003e\n#\n# # ServerName extension to indicate the name of the server.\n# # http://tools.ietf.org/html/rfc4366#section-3.1\n# server_name: \u003cstring\u003e\n#\n# # Disable validation of the server certificate.\n# insecure_skip_verify: true\n\n# The root route on which each incoming alert enters.\nroute:\n receiver: 'default-slack-receiver'\n\n # The labels by which incoming alerts are grouped together. For example,\n # multiple alerts coming in for cluster=A and alertname=LatencyHigh would\n # be batched into a single group.\n #\n # To aggregate by all possible labels use '...' as the sole label name.\n # This effectively disables aggregation entirely, passing through all\n # alerts as-is. This is unlikely to be what you want, unless you have\n # a very low alert volume or your upstream notification system performs\n # its own grouping. Example: group_by: [...]\n group_by: ['alertname']\n\n # When a new group of alerts is created by an incoming alert, wait at\n # least 'group_wait' to send the initial notification.\n # This way ensures that you get multiple alerts for the same group that start\n # firing shortly after another are batched together on the first\n # notification.\n group_wait: 30s\n\n # When the first notification was sent, wait 'group_interval' to send a batch\n # of new alerts that started firing for that group.\n group_interval: 5m\n\n # If an alert has successfully been sent, wait 'repeat_interval' to\n # resend them.\n repeat_interval: 3h\n\n # All the above attributes are inherited by all child routes and can\n # overwritten on each.\n # The child route trees.\n routes:\n - match_re:\n alertname: JenkinsJob.*\n receiver: jenkins-slack-receiver\n routes:\n - match:\n severity: critical\n receiver: 'jenkins-slack-receiver'\n\n - match_re:\n service: .*\n receiver: default-slack-receiver\n routes:\n - match:\n severity: critical\n receiver: 'default-slack-receiver'\n\n# Inhibition rules allow to mute a set of alerts given that another alert is\n# firing.\n# We use this to mute any warning-level notifications if the same alert is\n# already critical.\ninhibit_rules:\n- source_match:\n severity: 'critical'\n target_match:\n severity: 'warning'\n equal: ['alertname', 'instance']\n\nreceivers:\n- name: 'jenkins-slack-receiver'\n slack_configs:\n - api_url: 'TE07RD1V1/B01U1NV9HV3/hKZXJJ74g2JcISq4K3QC1eG9'\n channel: '#fdio-jobs-monitoring'\n send_resolved: true\n icon_url: https://avatars3.githubusercontent.com/u/3380462\n title: |-\n [{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}\n {{- if gt (len .CommonLabels) (len .GroupLabels) -}}\n {{\" \"}}(\n {{- with .CommonLabels.Remove .GroupLabels.Names }}\n {{- range $index, $label := .SortedPairs -}}\n {{ if $index }}, {{ end }}\n {{- $label.Name }}=\"{{ $label.Value -}}\"\n {{- end }}\n {{- end -}}\n )\n {{- end }}\n text: \u003e-\n {{ range .Alerts -}}\n *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}\n\n *Description:* {{ .Annotations.description }}\n\n *Details:*\n {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`\n {{ end }}\n {{ end }}\n\n- name: 'default-slack-receiver'\n slack_configs:\n - api_url: 'TE07RD1V1/B01UUK23B6C/hZTcCu42FUv8d6rtirHtcYIi'\n channel: '#fdio-infra-monitoring'\n send_resolved: true\n icon_url: https://avatars3.githubusercontent.com/u/3380462\n title: |-\n [{{ .Status | toUpper }}{{ if eq .Status \"firing\" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}\n {{- if gt (len .CommonLabels) (len .GroupLabels) -}}\n {{\" \"}}(\n {{- with .CommonLabels.Remove .GroupLabels.Names }}\n {{- range $index, $label := .SortedPairs -}}\n {{ if $index }}, {{ end }}\n {{- $label.Name }}=\"{{ $label.Value -}}\"\n {{- end }}\n {{- end -}}\n )\n {{- end }}\n text: \u003e-\n {{ range .Alerts -}}\n *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}\n\n *Description:* {{ .Annotations.description }}\n\n *Details:*\n {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`\n {{ end }}\n {{ end }}\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"alertmanager\"\n port = \"alertmanager\"\n tags = [ \"alertmanager${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Alertmanager Check Live\"\n type = \"http\"\n path = \"/-/healthy\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 1000\n memory = 1024\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"alertmanager\" {\n static = 9093\n }\n }\n }\n }\n }\n}",
- "json": null,
- "modify_index": "8259282",
- "name": "prod-alertmanager",
- "namespace": "default",
- "policy_override": null,
- "purge_on_destroy": null,
- "region": "global",
- "task_groups": [
- {
- "count": 1,
- "meta": {},
- "name": "prod-group1-alertmanager",
- "task": [
- {
- "driver": "exec",
- "meta": {},
- "name": "prod-task1-alertmanager",
- "volume_mounts": []
- }
- ],
- "volumes": []
- }
- ],
- "type": "service"
- },
- "sensitive_attributes": [],
- "private": "bnVsbA==",
- "dependencies": [
- "module.alertmanager.data.template_file.nomad_job_alertmanager"
- ]
- }
- ]
- },
- {
- "module": "module.grafana",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_grafana",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "cf7f3cd265a99a6f72f774f25564f1ec0e9e2c268a8f76112aed6cffe73bc4d4",
- "rendered": "job \"prod-grafana\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-grafana\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-grafana\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"grafana/grafana:7.3.7\"\n dns_servers = [ \"172.17.0.1\" ]\n volumes = [\n \"secrets/prometheus.yml:/etc/grafana/provisioning/datasources/prometheus.yml\",\n \"secrets/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml\",\n \"secrets/grafana.ini:/etc/grafana/grafana.ini\",\n \"secrets/node_exporter.json:/etc/grafana/provisioning/dashboards/node_exporter.json\",\n \"secrets/docker_cadvisor.json:/etc/grafana/provisioning/dashboards/docker_cadvisor.json\",\n \"secrets/nomad.json:/etc/grafana/provisioning/dashboards/nomad.json\",\n \"secrets/consul.json:/etc/grafana/provisioning/dashboards/consul.json\",\n \"secrets/prometheus.json:/etc/grafana/provisioning/dashboards/prometheus.json\",\n \"secrets/blackbox_exporter_http.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_http.json\",\n \"secrets/blackbox_exporter_icmp.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_icmp.json\"\n ]\n }\n\n artifact {\n # Prometheus Node Exporter\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/node_exporter.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Docker cAdvisor\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/docker_cadvisor.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Nomad\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/nomad.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Consul\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/consul.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/prometheus.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus Blackbox Exporter HTTP\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_http.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus Blackbox Exporter ICMP\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_icmp.json\"\n destination = \"secrets/\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/prometheus.yml\"\n data = \u003c\u003cEOH\napiVersion: 1\ndatasources:\n- name: Prometheus\n type: prometheus\n access: direct\n orgId: 1\n url: http://prometheus.service.consul:9090\n basicAuth: false\n isDefault: true\n version: 1\n editable: false\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/dashboards.yml\"\n data = \u003c\u003cEOH\napiVersion: 1\nproviders:\n- name: dashboards\n type: file\n disableDeletion: false\n updateIntervalSeconds: 10\n allowUiUpdates: false\n options:\n path: /etc/grafana/provisioning/dashboards\n foldersFromFilesStructure: true\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/grafana.ini\"\n data = \u003c\u003cEOH\napp_mode = production\n\n[metrics]\nenabled = true\n\n[server]\nprotocol = http\nhttp_port = 3000\nroot_url = http://grafana.service.consul:3000\nenable_gzip = true\n;cert_file =\n;cert_key =\n\n[security]\nadmin_user = grafanauser\nadmin_password = Grafana1234\nsecret_key = SW2YcwTIb9zpOOhoPsMm\n\n[users]\nallow_sign_up = false\nallow_org_create = false\nauto_assign_org = true\nauto_assign_org_role = Viewer\ndefault_theme = dark\n\n[auth.basic]\nenabled = true\n\n[auth]\ndisable_login_form = false\ndisable_signout_menu = false\n\n[auth.anonymous]\nenabled = false\n\n[log]\nmode = console\nlevel = info\n\n[log.console]\nlevel = info\nformat = console\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"grafana\"\n port = \"grafana\"\n tags = [ \"grafana${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Grafana Check Live\"\n type = \"http\"\n protocol = \"http\"\n tls_skip_verify = true\n path = \"/api/health\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 1000\n memory = 2048\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"grafana\" {\n static = 3000\n }\n }\n }\n }\n }\n}",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n%{ if use_canary }\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n%{ endif }\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-${service_name}\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = ${group_count}\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"$${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-${service_name}\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"${image}\"\n dns_servers = [ \"172.17.0.1\" ]\n volumes = [\n \"secrets/prometheus.yml:/etc/grafana/provisioning/datasources/prometheus.yml\",\n \"secrets/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml\",\n \"secrets/grafana.ini:/etc/grafana/grafana.ini\",\n \"secrets/node_exporter.json:/etc/grafana/provisioning/dashboards/node_exporter.json\",\n \"secrets/docker_cadvisor.json:/etc/grafana/provisioning/dashboards/docker_cadvisor.json\",\n \"secrets/nomad.json:/etc/grafana/provisioning/dashboards/nomad.json\",\n \"secrets/consul.json:/etc/grafana/provisioning/dashboards/consul.json\",\n \"secrets/prometheus.json:/etc/grafana/provisioning/dashboards/prometheus.json\",\n \"secrets/blackbox_exporter_http.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_http.json\",\n \"secrets/blackbox_exporter_icmp.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_icmp.json\"\n ]\n }\n\n artifact {\n # Prometheus Node Exporter\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/node_exporter.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Docker cAdvisor\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/docker_cadvisor.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Nomad\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/nomad.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Consul\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/consul.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/prometheus.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus Blackbox Exporter HTTP\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_http.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus Blackbox Exporter ICMP\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_icmp.json\"\n destination = \"secrets/\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/prometheus.yml\"\n data = \u003c\u003cEOH\napiVersion: 1\ndatasources:\n- name: Prometheus\n type: prometheus\n access: direct\n orgId: 1\n url: http://prometheus.service.consul:9090\n basicAuth: false\n isDefault: true\n version: 1\n editable: false\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/dashboards.yml\"\n data = \u003c\u003cEOH\napiVersion: 1\nproviders:\n- name: dashboards\n type: file\n disableDeletion: false\n updateIntervalSeconds: 10\n allowUiUpdates: false\n options:\n path: /etc/grafana/provisioning/dashboards\n foldersFromFilesStructure: true\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/grafana.ini\"\n data = \u003c\u003cEOH\napp_mode = production\n\n[metrics]\nenabled = true\n\n[server]\nprotocol = http\nhttp_port = ${port}\nroot_url = http://${service_name}.service.consul:${port}\nenable_gzip = true\n;cert_file =\n;cert_key =\n\n[security]\nadmin_user = grafanauser\nadmin_password = Grafana1234\nsecret_key = SW2YcwTIb9zpOOhoPsMm\n\n[users]\nallow_sign_up = false\nallow_org_create = false\nauto_assign_org = true\nauto_assign_org_role = Viewer\ndefault_theme = dark\n\n[auth.basic]\nenabled = true\n\n[auth]\ndisable_login_form = false\ndisable_signout_menu = false\n\n[auth.anonymous]\nenabled = false\n\n[log]\nmode = console\nlevel = info\n\n[log.console]\nlevel = info\nformat = console\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"${service_name}\"\n port = \"${service_name}\"\n tags = [ \"${service_name}$${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Grafana Check Live\"\n type = \"http\"\n protocol = \"http\"\n tls_skip_verify = true\n path = \"/api/health\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = ${cpu}\n memory = ${mem}\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"${service_name}\" {\n static = ${port}\n }\n }\n }\n }\n }\n}",
- "vars": {
- "cpu": "1000",
- "datacenters": "yul1",
- "group_count": "1",
- "image": "grafana/grafana:7.3.7",
- "job_name": "prod-grafana",
- "mem": "2048",
- "port": "3000",
- "service_name": "grafana",
- "use_canary": "true",
- "use_vault_provider": "false"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.grafana",
- "mode": "managed",
- "type": "nomad_job",
- "name": "nomad_job_grafana",
- "provider": "provider[\"registry.terraform.io/hashicorp/nomad\"].yul1",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "allocation_ids": [
- "01d60c86-d59b-83ac-78c8-a903ac63816e"
- ],
- "datacenters": [
- "yul1"
- ],
- "deployment_id": "6d9fa6f6-7f84-4765-9488-7d92f32ec4df",
- "deployment_status": "successful",
- "deregister_on_destroy": true,
- "deregister_on_id_change": true,
- "detach": false,
- "id": "prod-grafana",
- "jobspec": "job \"prod-grafana\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-grafana\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-grafana\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"grafana/grafana:7.3.7\"\n dns_servers = [ \"172.17.0.1\" ]\n volumes = [\n \"secrets/prometheus.yml:/etc/grafana/provisioning/datasources/prometheus.yml\",\n \"secrets/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml\",\n \"secrets/grafana.ini:/etc/grafana/grafana.ini\",\n \"secrets/node_exporter.json:/etc/grafana/provisioning/dashboards/node_exporter.json\",\n \"secrets/docker_cadvisor.json:/etc/grafana/provisioning/dashboards/docker_cadvisor.json\",\n \"secrets/nomad.json:/etc/grafana/provisioning/dashboards/nomad.json\",\n \"secrets/consul.json:/etc/grafana/provisioning/dashboards/consul.json\",\n \"secrets/prometheus.json:/etc/grafana/provisioning/dashboards/prometheus.json\",\n \"secrets/blackbox_exporter_http.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_http.json\",\n \"secrets/blackbox_exporter_icmp.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_icmp.json\"\n ]\n }\n\n artifact {\n # Prometheus Node Exporter\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/node_exporter.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Docker cAdvisor\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/docker_cadvisor.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Nomad\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/nomad.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Consul\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/consul.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/prometheus.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus Blackbox Exporter HTTP\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_http.json\"\n destination = \"secrets/\"\n }\n\n artifact {\n # Prometheus Blackbox Exporter ICMP\n source = \"https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_icmp.json\"\n destination = \"secrets/\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/prometheus.yml\"\n data = \u003c\u003cEOH\napiVersion: 1\ndatasources:\n- name: Prometheus\n type: prometheus\n access: direct\n orgId: 1\n url: http://prometheus.service.consul:9090\n basicAuth: false\n isDefault: true\n version: 1\n editable: false\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/dashboards.yml\"\n data = \u003c\u003cEOH\napiVersion: 1\nproviders:\n- name: dashboards\n type: file\n disableDeletion: false\n updateIntervalSeconds: 10\n allowUiUpdates: false\n options:\n path: /etc/grafana/provisioning/dashboards\n foldersFromFilesStructure: true\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/grafana.ini\"\n data = \u003c\u003cEOH\napp_mode = production\n\n[metrics]\nenabled = true\n\n[server]\nprotocol = http\nhttp_port = 3000\nroot_url = http://grafana.service.consul:3000\nenable_gzip = true\n;cert_file =\n;cert_key =\n\n[security]\nadmin_user = grafanauser\nadmin_password = Grafana1234\nsecret_key = SW2YcwTIb9zpOOhoPsMm\n\n[users]\nallow_sign_up = false\nallow_org_create = false\nauto_assign_org = true\nauto_assign_org_role = Viewer\ndefault_theme = dark\n\n[auth.basic]\nenabled = true\n\n[auth]\ndisable_login_form = false\ndisable_signout_menu = false\n\n[auth.anonymous]\nenabled = false\n\n[log]\nmode = console\nlevel = info\n\n[log.console]\nlevel = info\nformat = console\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"grafana\"\n port = \"grafana\"\n tags = [ \"grafana${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Grafana Check Live\"\n type = \"http\"\n protocol = \"http\"\n tls_skip_verify = true\n path = \"/api/health\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 1000\n memory = 2048\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"grafana\" {\n static = 3000\n }\n }\n }\n }\n }\n}",
- "json": null,
- "modify_index": "7575032",
- "name": "prod-grafana",
- "namespace": "default",
- "policy_override": null,
- "purge_on_destroy": null,
- "region": "global",
- "task_groups": [
- {
- "count": 1,
- "meta": {},
- "name": "prod-group1-grafana",
- "task": [
- {
- "driver": "docker",
- "meta": {},
- "name": "prod-task1-grafana",
- "volume_mounts": []
- }
- ],
- "volumes": []
- }
- ],
- "type": "service"
- },
- "sensitive_attributes": [],
- "private": "bnVsbA==",
- "dependencies": [
- "module.grafana.data.template_file.nomad_job_grafana"
- ]
- }
- ]
- },
- {
- "module": "module.minio",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_mc",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "e9e956124e6fed4268bec199b9d7fd7aaff506e906b53621c6dc0e3116c40f52",
- "rendered": "job \"prod-mc\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"batch\"\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-mc\" {\n task \"prod-task1-create-buckets\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"minio/mc:RELEASE.2021-07-27T02-40-15Z\"\n entrypoint = [\n \"/bin/sh\",\n \"-c\",\n \"mc config host add LOCALMINIO http://storage.service.consul:9000 $MINIO_ACCESS_KEY $MINIO_SECRET_KEY \u0026\u0026 mc mb -p LOCALMINIO/logs.fd.io LOCALMINIO/docs.fd.io ; mc policy set public LOCALMINIO/logs.fd.io mc policy set public LOCALMINIO/docs.fd.io mc ilm add --expiry-days '180' LOCALMINIO/logs.fd.io mc admin user add LOCALMINIO storage Storage1234 mc admin policy set LOCALMINIO writeonly user=storage\"\n ]\n dns_servers = [ \"${attr.unique.network.ip-address}\" ]\n privileged = false\n }\n\n # The env stanza configures a list of environment variables to populate\n # the task's environment before starting.\n env {\n \n MINIO_ACCESS_KEY = \"minio\"\n MINIO_SECRET_KEY = \"minio123\"\n \n \n }\n }\n }\n}\n",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"batch\"\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-mc\" {\n task \"prod-task1-create-buckets\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n %{ if use_vault_provider }\n vault {\n policies = \"${vault_kv_policy_name}\"\n }\n %{ endif }\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"${image}\"\n entrypoint = [\n \"/bin/sh\",\n \"-c\",\n \"${command}\"\n ]\n dns_servers = [ \"$${attr.unique.network.ip-address}\" ]\n privileged = false\n }\n\n # The env stanza configures a list of environment variables to populate\n # the task's environment before starting.\n env {\n %{ if use_vault_provider }\n {{ with secret \"${vault_kv_path}\" }}\n MINIO_ACCESS_KEY = \"{{ .Data.data.${vault_kv_field_access_key} }}\"\n MINIO_SECRET_KEY = \"{{ .Data.data.${vault_kv_field_secret_key} }}\"\n {{ end }}\n %{ else }\n MINIO_ACCESS_KEY = \"${access_key}\"\n MINIO_SECRET_KEY = \"${secret_key}\"\n %{ endif }\n ${ envs }\n }\n }\n }\n}\n",
- "vars": {
- "access_key": "minio",
- "command": "mc config host add LOCALMINIO http://storage.service.consul:9000 $MINIO_ACCESS_KEY $MINIO_SECRET_KEY \u0026\u0026 mc mb -p LOCALMINIO/logs.fd.io LOCALMINIO/docs.fd.io ; mc policy set public LOCALMINIO/logs.fd.io mc policy set public LOCALMINIO/docs.fd.io mc ilm add --expiry-days '180' LOCALMINIO/logs.fd.io mc admin user add LOCALMINIO storage Storage1234 mc admin policy set LOCALMINIO writeonly user=storage",
- "datacenters": "yul1",
- "envs": "",
- "image": "minio/mc:RELEASE.2021-07-27T02-40-15Z",
- "job_name": "prod-mc",
- "minio_port": "9000",
- "minio_service_name": "storage",
- "secret_key": "minio123",
- "service_name": "mc",
- "use_vault_provider": "false"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.minio",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_minio",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "68243169c0df15ef6b56803101ed0c744a84545c34df366ee4036052c6292ef3",
- "rendered": "job \"prod-minio\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # All groups in this job should be scheduled on different hosts.\n constraint {\n operator = \"distinct_hosts\"\n value = \"true\"\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-minio\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 4\n\n # https://www.nomadproject.io/docs/job-specification/volume\n \n volume \"prod-volume1-minio\" {\n type = \"host\"\n read_only = false\n source = \"prod-volume-data1-1\"\n }\n \n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-minio\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n \n volume_mount {\n volume = \"prod-volume1-minio\"\n destination = \"/data/\"\n read_only = false\n }\n \n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"minio/minio:RELEASE.2021-07-27T02-40-15Z\"\n dns_servers = [ \"172.17.0.1\" ]\n network_mode = \"host\"\n command = \"server\"\n args = [ \"http://10.32.8.1{4...7}:9000/data/\" ]\n port_map {\n http = 9000\n }\n privileged = false\n }\n\n # The env stanza configures a list of environment variables to populate\n # the task's environment before starting.\n env {\n\n MINIO_ACCESS_KEY = \"minio\"\n MINIO_SECRET_KEY = \"minio123\"\n\n MINIO_BROWSER=\"off\"\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"storage\"\n port = \"http\"\n tags = [ \"storage${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Min.io Server HTTP Check Live\"\n type = \"http\"\n port = \"http\"\n protocol = \"http\"\n method = \"GET\"\n path = \"/minio/health/live\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n check {\n name = \"Min.io Server HTTP Check Ready\"\n type = \"http\"\n port = \"http\"\n protocol = \"http\"\n method = \"GET\"\n path = \"/minio/health/ready\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 40000\n memory = 40000\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"http\" {\n static = 9000\n }\n }\n }\n }\n }\n}\n",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n%{ if use_canary }\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n%{ endif }\n }\n\n # All groups in this job should be scheduled on different hosts.\n constraint {\n operator = \"distinct_hosts\"\n value = \"true\"\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-minio\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = ${group_count}\n\n # https://www.nomadproject.io/docs/job-specification/volume\n %{ if use_host_volume }\n volume \"prod-volume1-minio\" {\n type = \"host\"\n read_only = false\n source = \"${host_volume}\"\n }\n %{ endif }\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-minio\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n %{ if use_host_volume }\n volume_mount {\n volume = \"prod-volume1-minio\"\n destination = \"${data_dir}\"\n read_only = false\n }\n %{ endif }\n\n %{ if use_vault_provider }\n vault {\n policies = \"${vault_kv_policy_name}\"\n }\n %{ endif }\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"${image}\"\n dns_servers = [ \"172.17.0.1\" ]\n network_mode = \"host\"\n command = \"server\"\n args = [ \"${host}:${port}${data_dir}\" ]\n port_map {\n http = ${port}\n }\n privileged = false\n }\n\n # The env stanza configures a list of environment variables to populate\n # the task's environment before starting.\n env {\n%{ if use_vault_provider }\n{{ with secret \"${vault_kv_path}\" }}\n MINIO_ACCESS_KEY = \"{{ .Data.data.${vault_kv_field_access_key} }}\"\n MINIO_SECRET_KEY = \"{{ .Data.data.${vault_kv_field_secret_key} }}\"\n{{ end }}\n%{ else }\n MINIO_ACCESS_KEY = \"${access_key}\"\n MINIO_SECRET_KEY = \"${secret_key}\"\n%{ endif }\n ${ envs }\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"${service_name}\"\n port = \"http\"\n tags = [ \"storage$${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Min.io Server HTTP Check Live\"\n type = \"http\"\n port = \"http\"\n protocol = \"http\"\n method = \"GET\"\n path = \"/minio/health/live\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n check {\n name = \"Min.io Server HTTP Check Ready\"\n type = \"http\"\n port = \"http\"\n protocol = \"http\"\n method = \"GET\"\n path = \"/minio/health/ready\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = ${cpu}\n memory = ${memory}\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"http\" {\n static = ${port}\n }\n }\n }\n }\n }\n}\n",
- "vars": {
- "access_key": "minio",
- "cpu": "40000",
- "cpu_proxy": "200",
- "data_dir": "/data/",
- "datacenters": "yul1",
- "envs": "MINIO_BROWSER=\"off\"",
- "group_count": "4",
- "host": "http://10.32.8.1{4...7}",
- "host_volume": "prod-volume-data1-1",
- "image": "minio/minio:RELEASE.2021-07-27T02-40-15Z",
- "job_name": "prod-minio",
- "memory": "40000",
- "memory_proxy": "128",
- "port": "9000",
- "secret_key": "minio123",
- "service_name": "storage",
- "upstreams": "[]",
- "use_canary": "true",
- "use_host_volume": "true",
- "use_vault_provider": "false"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.minio",
- "mode": "managed",
- "type": "nomad_job",
- "name": "nomad_job_minio",
- "provider": "provider[\"registry.terraform.io/hashicorp/nomad\"].yul1",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "allocation_ids": [
- "844e34c1-13e6-3ddb-9402-e876230e5cdb",
- "44e1a0f0-5328-eafb-3beb-bfd53af2b618",
- "a6695ea0-2563-72ae-1791-a08ef933ff72",
- "51126acb-945c-2629-5787-76b85677ddd0"
- ],
- "datacenters": [
- "yul1"
- ],
- "deployment_id": "33d8013c-7c7c-6828-a204-58d9286b61ef",
- "deployment_status": "successful",
- "deregister_on_destroy": true,
- "deregister_on_id_change": true,
- "detach": false,
- "id": "prod-minio",
- "jobspec": "job \"prod-minio\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # All groups in this job should be scheduled on different hosts.\n constraint {\n operator = \"distinct_hosts\"\n value = \"true\"\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-minio\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 4\n\n # https://www.nomadproject.io/docs/job-specification/volume\n \n volume \"prod-volume1-minio\" {\n type = \"host\"\n read_only = false\n source = \"prod-volume-data1-1\"\n }\n \n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-minio\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n \n volume_mount {\n volume = \"prod-volume1-minio\"\n destination = \"/data/\"\n read_only = false\n }\n \n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"minio/minio:RELEASE.2021-07-27T02-40-15Z\"\n dns_servers = [ \"172.17.0.1\" ]\n network_mode = \"host\"\n command = \"server\"\n args = [ \"http://10.32.8.1{4...7}:9000/data/\" ]\n port_map {\n http = 9000\n }\n privileged = false\n }\n\n # The env stanza configures a list of environment variables to populate\n # the task's environment before starting.\n env {\n\n MINIO_ACCESS_KEY = \"minio\"\n MINIO_SECRET_KEY = \"minio123\"\n\n MINIO_BROWSER=\"off\"\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"storage\"\n port = \"http\"\n tags = [ \"storage${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Min.io Server HTTP Check Live\"\n type = \"http\"\n port = \"http\"\n protocol = \"http\"\n method = \"GET\"\n path = \"/minio/health/live\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n check {\n name = \"Min.io Server HTTP Check Ready\"\n type = \"http\"\n port = \"http\"\n protocol = \"http\"\n method = \"GET\"\n path = \"/minio/health/ready\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 40000\n memory = 40000\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"http\" {\n static = 9000\n }\n }\n }\n }\n }\n}\n",
- "json": null,
- "modify_index": "8949013",
- "name": "prod-minio",
- "namespace": "default",
- "policy_override": null,
- "purge_on_destroy": null,
- "region": "global",
- "task_groups": [
- {
- "count": 4,
- "meta": {},
- "name": "prod-group1-minio",
- "task": [
- {
- "driver": "docker",
- "meta": {},
- "name": "prod-task1-minio",
- "volume_mounts": [
- {
- "destination": "/data/",
- "read_only": false,
- "volume": "prod-volume1-minio"
- }
- ]
- }
- ],
- "volumes": [
- {
- "name": "prod-volume1-minio",
- "read_only": false,
- "source": "prod-volume-data1-1",
- "type": "host"
- }
- ]
- }
- ],
- "type": "service"
- },
- "sensitive_attributes": [],
- "private": "bnVsbA==",
- "dependencies": [
- "module.minio.data.template_file.nomad_job_minio"
- ]
- }
- ]
- },
- {
- "module": "module.nginx",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_nginx",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "f6d48eb498752cd7c1c73a26a146961048a1b7f7d53602df55b413f27226e519",
- "rendered": "job \"prod-nginx\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 0\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = false\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 0\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-nginx\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # https://www.nomadproject.io/docs/job-specification/volume\n \n volume \"prod-volume1-nginx\" {\n type = \"host\"\n read_only = false\n source = \"prod-volume-data1-1\"\n }\n \n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-nginx\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"nginx:stable\"\n port_map {\n https = 443\n }\n privileged = false\n volumes = [\n \"/etc/ssl/certs/docs.nginx.service.consul.crt:/etc/ssl/certs/docs.nginx.service.consul.crt\",\n \"/etc/ssl/private/docs.nginx.service.consul.key:/etc/ssl/private/docs.nginx.service.consul.key\",\n \"/etc/ssl/certs/logs.nginx.service.consul.crt:/etc/ssl/certs/logs.nginx.service.consul.crt\",\n \"/etc/ssl/private/logs.nginx.service.consul.key:/etc/ssl/private/logs.nginx.service.consul.key\",\n \"custom/upstream.conf:/etc/nginx/conf.d/upstream.conf\",\n \"custom/logs.conf:/etc/nginx/conf.d/logs.conf\",\n \"custom/docs.conf:/etc/nginx/conf.d/docs.conf\"\n ]\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template.html\n #\n template {\n data = \u003c\u003cEOH\n upstream storage {\n {{ range service \"storage\" }}\n server {{ .Address }}:{{ .Port }};\n {{ end }}\n }\n EOH\n destination = \"custom/upstream.conf\"\n }\n template {\n data = \u003c\u003cEOH\n server {\n listen 443 ssl default_server;\n server_name logs.nginx.service.consul;\n keepalive_timeout 70;\n ssl_session_cache shared:SSL:10m;\n ssl_session_timeout 10m;\n ssl_protocols TLSv1.2;\n ssl_prefer_server_ciphers on;\n ssl_ciphers \"ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384\";\n ssl_certificate /etc/ssl/certs/logs.nginx.service.consul.crt;\n ssl_certificate_key /etc/ssl/private/logs.nginx.service.consul.key;\n location / {\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/;\n server_name_in_redirect off;\n }\n location ~ (.*html.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type text/html;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n location ~ (.*txt.gz|.*log.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type text/plain;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n location ~ (.*xml.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type application/xml;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n }\n EOH\n destination = \"custom/logs.conf\"\n }\n template {\n data = \u003c\u003cEOH\n server {\n listen 443 ssl;\n server_name docs.nginx.service.consul;\n keepalive_timeout 70;\n ssl_session_cache shared:SSL:10m;\n ssl_session_timeout 10m;\n ssl_protocols TLSv1.2;\n ssl_prefer_server_ciphers on;\n ssl_ciphers \"ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384\";\n ssl_certificate /etc/ssl/certs/docs.nginx.service.consul.crt;\n ssl_certificate_key /etc/ssl/private/docs.nginx.service.consul.key;\n location / {\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/docs.fd.io/;\n server_name_in_redirect off;\n }\n }\n EOH\n destination = \"custom/docs.conf\"\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service.html\n #\n service {\n name = \"nginx\"\n port = \"https\"\n tags = [ \"docs\", \"logs\" ]\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 2000\n memory = 4096\n network {\n mode = \"bridge\"\n port \"https\" {\n static = 443\n }\n }\n }\n }\n }\n}",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 0\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = false\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 0\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-nginx\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # https://www.nomadproject.io/docs/job-specification/volume\n %{ if use_host_volume }\n volume \"prod-volume1-nginx\" {\n type = \"host\"\n read_only = false\n source = \"${host_volume}\"\n }\n %{ endif }\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-nginx\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"nginx:stable\"\n port_map {\n https = 443\n }\n privileged = false\n volumes = [\n \"/etc/ssl/certs/docs.nginx.service.consul.crt:/etc/ssl/certs/docs.nginx.service.consul.crt\",\n \"/etc/ssl/private/docs.nginx.service.consul.key:/etc/ssl/private/docs.nginx.service.consul.key\",\n \"/etc/ssl/certs/logs.nginx.service.consul.crt:/etc/ssl/certs/logs.nginx.service.consul.crt\",\n \"/etc/ssl/private/logs.nginx.service.consul.key:/etc/ssl/private/logs.nginx.service.consul.key\",\n \"custom/upstream.conf:/etc/nginx/conf.d/upstream.conf\",\n \"custom/logs.conf:/etc/nginx/conf.d/logs.conf\",\n \"custom/docs.conf:/etc/nginx/conf.d/docs.conf\"\n ]\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template.html\n #\n template {\n data = \u003c\u003cEOH\n upstream storage {\n {{ range service \"storage\" }}\n server {{ .Address }}:{{ .Port }};\n {{ end }}\n }\n EOH\n destination = \"custom/upstream.conf\"\n }\n template {\n data = \u003c\u003cEOH\n server {\n listen 443 ssl default_server;\n server_name logs.nginx.service.consul;\n keepalive_timeout 70;\n ssl_session_cache shared:SSL:10m;\n ssl_session_timeout 10m;\n ssl_protocols TLSv1.2;\n ssl_prefer_server_ciphers on;\n ssl_ciphers \"ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384\";\n ssl_certificate /etc/ssl/certs/logs.nginx.service.consul.crt;\n ssl_certificate_key /etc/ssl/private/logs.nginx.service.consul.key;\n location / {\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/;\n server_name_in_redirect off;\n }\n location ~ (.*html.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type text/html;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n location ~ (.*txt.gz|.*log.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type text/plain;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n location ~ (.*xml.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type application/xml;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n }\n EOH\n destination = \"custom/logs.conf\"\n }\n template {\n data = \u003c\u003cEOH\n server {\n listen 443 ssl;\n server_name docs.nginx.service.consul;\n keepalive_timeout 70;\n ssl_session_cache shared:SSL:10m;\n ssl_session_timeout 10m;\n ssl_protocols TLSv1.2;\n ssl_prefer_server_ciphers on;\n ssl_ciphers \"ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384\";\n ssl_certificate /etc/ssl/certs/docs.nginx.service.consul.crt;\n ssl_certificate_key /etc/ssl/private/docs.nginx.service.consul.key;\n location / {\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/docs.fd.io/;\n server_name_in_redirect off;\n }\n }\n EOH\n destination = \"custom/docs.conf\"\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service.html\n #\n service {\n name = \"nginx\"\n port = \"https\"\n tags = [ \"docs\", \"logs\" ]\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 2000\n memory = 4096\n network {\n mode = \"bridge\"\n port \"https\" {\n static = 443\n }\n }\n }\n }\n }\n}",
- "vars": {
- "datacenters": "yul1",
- "host_volume": "prod-volume-data1-1",
- "job_name": "prod-nginx",
- "use_host_volume": "true"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.nginx",
- "mode": "managed",
- "type": "nomad_job",
- "name": "nomad_job_nginx",
- "provider": "provider[\"registry.terraform.io/hashicorp/nomad\"].yul1",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "allocation_ids": [
- "8a81e41d-911d-e083-9bcd-b5b3c70b33fa"
- ],
- "datacenters": [
- "yul1"
- ],
- "deployment_id": "",
- "deployment_status": "",
- "deregister_on_destroy": true,
- "deregister_on_id_change": true,
- "detach": false,
- "id": "prod-nginx",
- "jobspec": "job \"prod-nginx\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 0\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = false\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 0\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-nginx\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n # https://www.nomadproject.io/docs/job-specification/volume\n \n volume \"prod-volume1-nginx\" {\n type = \"host\"\n read_only = false\n source = \"prod-volume-data1-1\"\n }\n \n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-nginx\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"nginx:stable\"\n port_map {\n https = 443\n }\n privileged = false\n volumes = [\n \"/etc/ssl/certs/docs.nginx.service.consul.crt:/etc/ssl/certs/docs.nginx.service.consul.crt\",\n \"/etc/ssl/private/docs.nginx.service.consul.key:/etc/ssl/private/docs.nginx.service.consul.key\",\n \"/etc/ssl/certs/logs.nginx.service.consul.crt:/etc/ssl/certs/logs.nginx.service.consul.crt\",\n \"/etc/ssl/private/logs.nginx.service.consul.key:/etc/ssl/private/logs.nginx.service.consul.key\",\n \"custom/upstream.conf:/etc/nginx/conf.d/upstream.conf\",\n \"custom/logs.conf:/etc/nginx/conf.d/logs.conf\",\n \"custom/docs.conf:/etc/nginx/conf.d/docs.conf\"\n ]\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template.html\n #\n template {\n data = \u003c\u003cEOH\n upstream storage {\n {{ range service \"storage\" }}\n server {{ .Address }}:{{ .Port }};\n {{ end }}\n }\n EOH\n destination = \"custom/upstream.conf\"\n }\n template {\n data = \u003c\u003cEOH\n server {\n listen 443 ssl default_server;\n server_name logs.nginx.service.consul;\n keepalive_timeout 70;\n ssl_session_cache shared:SSL:10m;\n ssl_session_timeout 10m;\n ssl_protocols TLSv1.2;\n ssl_prefer_server_ciphers on;\n ssl_ciphers \"ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384\";\n ssl_certificate /etc/ssl/certs/logs.nginx.service.consul.crt;\n ssl_certificate_key /etc/ssl/private/logs.nginx.service.consul.key;\n location / {\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/;\n server_name_in_redirect off;\n }\n location ~ (.*html.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type text/html;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n location ~ (.*txt.gz|.*log.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type text/plain;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n location ~ (.*xml.gz)$ {\n add_header Content-Encoding gzip;\n add_header Content-Type application/xml;\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/logs.fd.io/$1;\n server_name_in_redirect off;\n }\n }\n EOH\n destination = \"custom/logs.conf\"\n }\n template {\n data = \u003c\u003cEOH\n server {\n listen 443 ssl;\n server_name docs.nginx.service.consul;\n keepalive_timeout 70;\n ssl_session_cache shared:SSL:10m;\n ssl_session_timeout 10m;\n ssl_protocols TLSv1.2;\n ssl_prefer_server_ciphers on;\n ssl_ciphers \"ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384\";\n ssl_certificate /etc/ssl/certs/docs.nginx.service.consul.crt;\n ssl_certificate_key /etc/ssl/private/docs.nginx.service.consul.key;\n location / {\n chunked_transfer_encoding off;\n proxy_connect_timeout 300;\n proxy_http_version 1.1;\n proxy_set_header Host $host:$server_port;\n proxy_set_header Connection \"\";\n proxy_pass http://storage/docs.fd.io/;\n server_name_in_redirect off;\n }\n }\n EOH\n destination = \"custom/docs.conf\"\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service.html\n #\n service {\n name = \"nginx\"\n port = \"https\"\n tags = [ \"docs\", \"logs\" ]\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 2000\n memory = 4096\n network {\n mode = \"bridge\"\n port \"https\" {\n static = 443\n }\n }\n }\n }\n }\n}",
- "json": null,
- "modify_index": "8993958",
- "name": "prod-nginx",
- "namespace": "default",
- "policy_override": null,
- "purge_on_destroy": null,
- "region": "global",
- "task_groups": [
- {
- "count": 1,
- "meta": {},
- "name": "prod-group1-nginx",
- "task": [
- {
- "driver": "docker",
- "meta": {},
- "name": "prod-task1-nginx",
- "volume_mounts": []
- }
- ],
- "volumes": [
- {
- "name": "prod-volume1-nginx",
- "read_only": false,
- "source": "prod-volume-data1-1",
- "type": "host"
- }
- ]
- }
- ],
- "type": "service"
- },
- "sensitive_attributes": [],
- "private": "bnVsbA==",
- "dependencies": [
- "module.nginx.data.template_file.nomad_job_nginx"
- ]
- }
- ]
- },
- {
- "module": "module.prometheus",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_prometheus",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "170256de3b14e7562d5d4b196864b5c0ad562cf124c5866d85bd81a3f35bcc27",
- "rendered": "job \"prod-prometheus\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-prometheus\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 4\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The volume stanza allows the group to specify that it requires a given\n # volume from the cluster.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/volume\n #\n \n volume \"prod-volume1-prometheus\" {\n type = \"host\"\n read_only = false\n source = \"prod-volume-data1-1\"\n }\n \n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-prometheus\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"exec\"\n\n \n volume_mount {\n volume = \"prod-volume1-prometheus\"\n destination = \"/data/\"\n read_only = false\n }\n \n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n command = \"local/prometheus-2.28.1.linux-amd64/prometheus\"\n args = [\n \"--config.file=secrets/prometheus.yml\",\n \"--storage.tsdb.path=/data/prometheus/\",\n \"--storage.tsdb.retention.time=7d\"\n ]\n }\n\n # The artifact stanza instructs Nomad to fetch and unpack a remote resource,\n # such as a file, tarball, or binary. Nomad downloads artifacts using the\n # popular go-getter library, which permits downloading artifacts from a\n # variety of locations using a URL as the input source.\n #\n # For more information and examples on the \"artifact\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/artifact\n #\n artifact {\n source = \"https://github.com/prometheus/prometheus/releases/download/v2.28.1/prometheus-2.28.1.linux-amd64.tar.gz\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/alerts.yml\"\n left_delimiter = \"{{{\"\n right_delimiter = \"}}}\"\n data = \u003c\u003cEOH\n---\ngroups:\n- name: \"Jenkins Job Health Exporter\"\n rules:\n - alert: JenkinsJobHealthExporterFailures\n expr: jenkins_job_failure{id=~\".*\"} \u003e jenkins_job_success{id=~\".*\"}\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Jenkins Job Health detected high failure rate on jenkins jobs.\"\n description: \"Job: {{ $labels.id }}\"\n - alert: JenkinsJobHealthExporterUnstable\n expr: jenkins_job_unstable{id=~\".*\"} \u003e jenkins_job_success{id=~\".*\"}\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Jenkins Job Health detected high unstable rate on jenkins jobs.\"\n description: \"Job: {{ $labels.id }}\"\n- name: \"Consul\"\n rules:\n - alert: ConsulServiceHealthcheckFailed\n expr: consul_catalog_service_node_healthy == 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul service healthcheck failed (instance {{ $labels.instance }}).\"\n description: \"Service: `{{ $labels.service_name }}` Healthcheck: `{{ $labels.service_id }}`.\"\n - alert: ConsulMissingMasterNode\n expr: consul_raft_peers \u003c 3\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul missing master node (instance {{ $labels.instance }}).\"\n description: \"Numbers of consul raft peers should be 3, in order to preserve quorum.\"\n - alert: ConsulAgentUnhealthy\n expr: consul_health_node_status{status=\"critical\"} == 1\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul agent unhealthy (instance {{ $labels.instance }}).\"\n description: \"A Consul agent is down.\"\n- name: \"Hosts\"\n rules:\n - alert: NodeDown\n expr: up == 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus target missing (instance {{ $labels.instance }}).\"\n description: \"A Prometheus target has disappeared. An exporter might be crashed.\"\n - alert: HostOutOfMemory\n expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 \u003c 10\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host out of memory (instance {{ $labels.instance }}).\"\n description: \"Node memory is filling up (\u003c 10% left).\"\n - alert: HostOomKillDetected\n expr: increase(node_vmstat_oom_kill[1m]) \u003e 0\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host OOM kill detected (instance {{ $labels.instance }}).\"\n description: \"OOM kill detected.\"\n - alert: HostMemoryUnderMemoryPressure\n expr: rate(node_vmstat_pgmajfault[1m]) \u003e 1000\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host memory under memory pressure (instance {{ $labels.instance }}).\"\n description: \"The node is under heavy memory pressure. High rate of major page faults.\"\n - alert: HostOutOfDiskSpace\n expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes \u003c 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host out of disk space (instance {{ $labels.instance }}).\"\n description: \"Disk is almost full (\u003c 10% left).\"\n - alert: HostRaidDiskFailure\n expr: node_md_disks{state=\"failed\"} \u003e 0\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host RAID disk failure (instance {{ $labels.instance }}).\"\n description: \"At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap.\"\n - alert: HostConntrackLimit\n expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit \u003e 0.8\n for: 5m\n labels:\n severity: warning\n annotations:\n summary: \"Host conntrack limit (instance {{ $labels.instance }}).\"\n description: \"The number of conntrack is approching limit.\"\n - alert: HostNetworkInterfaceSaturated\n expr: (rate(node_network_receive_bytes_total{device!~\"^tap.*\"}[1m]) + rate(node_network_transmit_bytes_total{device!~\"^tap.*\"}[1m])) / node_network_speed_bytes{device!~\"^tap.*\"} \u003e 0.8\n for: 1m\n labels:\n severity: warning\n annotations:\n summary: \"Host Network Interface Saturated (instance {{ $labels.instance }}).\"\n description: \"The network interface {{ $labels.interface }} on {{ $labels.instance }} is getting overloaded.\"\n - alert: HostSystemdServiceCrashed\n expr: node_systemd_unit_state{state=\"failed\"} == 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host SystemD service crashed (instance {{ $labels.instance }}).\"\n description: \"SystemD service crashed.\"\n - alert: HostEdacCorrectableErrorsDetected\n expr: increase(node_edac_correctable_errors_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: info\n annotations:\n summary: \"Host EDAC Correctable Errors detected (instance {{ $labels.instance }}).\"\n description: '{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.'\n - alert: HostEdacUncorrectableErrorsDetected\n expr: node_edac_uncorrectable_errors_total \u003e 0\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }}).\"\n description: '{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.'\n- name: \"Min.io\"\n rules:\n - alert: MinioDiskOffline\n expr: minio_offline_disks \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Minio disk offline (instance {{ $labels.instance }})\"\n description: \"Minio disk is offline.\"\n - alert: MinioStorageSpaceExhausted\n expr: minio_disk_storage_free_bytes / 1024 / 1024 / 1024 \u003c 10\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Minio storage space exhausted (instance {{ $labels.instance }}).\"\n description: \"Minio storage space is low (\u003c 10 GB).\"\n- name: \"Prometheus\"\n rules:\n - alert: PrometheusConfigurationReloadFailure\n expr: prometheus_config_last_reload_successful != 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus configuration reload failure (instance {{ $labels.instance }}).\"\n description: \"Prometheus configuration reload error.\"\n - alert: PrometheusTooManyRestarts\n expr: changes(process_start_time_seconds{job=~\"prometheus|pushgateway|alertmanager\"}[15m]) \u003e 2\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus too many restarts (instance {{ $labels.instance }}).\"\n description: \"Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping.\"\n - alert: PrometheusAlertmanagerConfigurationReloadFailure\n expr: alertmanager_config_last_reload_successful != 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus AlertManager configuration reload failure (instance {{ $labels.instance }}).\"\n description: \"AlertManager configuration reload error.\"\n - alert: PrometheusRuleEvaluationFailures\n expr: increase(prometheus_rule_evaluation_failures_total[3m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus rule evaluation failures (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\"\n - alert: PrometheusTargetScrapingSlow\n expr: prometheus_target_interval_length_seconds{quantile=\"0.9\"} \u003e 60\n for: 5m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus target scraping slow (instance {{ $labels.instance }}).\"\n description: \"Prometheus is scraping exporters slowly.\"\n - alert: PrometheusTsdbCompactionsFailed\n expr: increase(prometheus_tsdb_compactions_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB compactions failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB compactions failures.\"\n - alert: PrometheusTsdbHeadTruncationsFailed\n expr: increase(prometheus_tsdb_head_truncations_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB head truncations failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB head truncation failures.\"\n - alert: PrometheusTsdbWalCorruptions\n expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB WAL corruptions (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB WAL corruptions.\"\n - alert: PrometheusTsdbWalTruncationsFailed\n expr: increase(prometheus_tsdb_wal_truncations_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB WAL truncation failures.\"\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/prometheus.yml\"\n data = \u003c\u003cEOH\n---\nglobal:\n scrape_interval: 5s\n scrape_timeout: 5s\n evaluation_interval: 5s\n\nalerting:\n alertmanagers:\n - consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'alertmanager' ]\n\nrule_files:\n - 'alerts.yml'\n\nscrape_configs:\n\n - job_name: 'Nomad Cluster'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'nomad-client', 'nomad' ]\n relabel_configs:\n - source_labels: [__meta_consul_tags]\n regex: '(.*)http(.*)'\n action: keep\n metrics_path: /v1/metrics\n params:\n format: [ 'prometheus' ]\n\n - job_name: 'Consul Cluster'\n static_configs:\n - targets: [ '10.30.51.22:8500' ]\n - targets: [ '10.30.51.24:8500' ]\n - targets: [ '10.30.51.25:8500' ]\n - targets: [ '10.30.51.26:8500' ]\n - targets: [ '10.30.51.28:8500' ]\n - targets: [ '10.30.51.29:8500' ]\n - targets: [ '10.30.51.30:8500' ]\n - targets: [ '10.30.51.39:8500' ]\n - targets: [ '10.30.51.40:8500' ]\n - targets: [ '10.30.51.50:8500' ]\n - targets: [ '10.30.51.51:8500' ]\n - targets: [ '10.30.51.65:8500' ]\n - targets: [ '10.30.51.66:8500' ]\n - targets: [ '10.30.51.67:8500' ]\n - targets: [ '10.30.51.68:8500' ]\n - targets: [ '10.30.51.70:8500' ]\n - targets: [ '10.30.51.71:8500' ]\n - targets: [ '10.32.8.14:8500' ]\n - targets: [ '10.32.8.15:8500' ]\n - targets: [ '10.32.8.16:8500' ]\n - targets: [ '10.32.8.17:8500' ]\n metrics_path: /v1/agent/metrics\n params:\n format: [ 'prometheus' ]\n\n - job_name: 'Blackbox Exporter (icmp)'\n static_configs:\n - targets: [ 'gerrit.fd.io' ]\n - targets: [ 'jenkins.fd.io' ]\n - targets: [ '10.32.8.17' ]\n params:\n module: [ 'icmp_v4' ]\n relabel_configs:\n - source_labels: [__address__]\n target_label: __param_target\n - source_labels: [__param_target]\n target_label: instance\n - target_label: __address__\n replacement: localhost:9115\n metrics_path: /probe\n\n - job_name: 'Blackbox Exporter (http)'\n static_configs:\n - targets: [ 'gerrit.fd.io' ]\n - targets: [ 'jenkins.fd.io' ]\n params:\n module: [ 'http_2xx' ]\n relabel_configs:\n - source_labels: [__address__]\n target_label: __param_target\n - source_labels: [__param_target]\n target_label: instance\n - target_label: __address__\n replacement: localhost:9115\n metrics_path: /probe\n\n - job_name: 'Jenkins Job Health Exporter'\n static_configs:\n - targets: [ '10.30.51.22:9186' ]\n metric_relabel_configs:\n - source_labels: [ __name__ ]\n regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'\n action: replace\n replacement: '$1'\n target_label: id\n - source_labels: [ __name__ ]\n regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'\n replacement: 'jenkins_job_$2'\n target_label: __name__\n\n - job_name: 'Node Exporter'\n static_configs:\n - targets: [ '10.30.51.22:9100' ]\n - targets: [ '10.30.51.24:9100' ]\n - targets: [ '10.30.51.25:9100' ]\n - targets: [ '10.30.51.26:9100' ]\n - targets: [ '10.30.51.28:9100' ]\n - targets: [ '10.30.51.29:9100' ]\n - targets: [ '10.30.51.30:9100' ]\n - targets: [ '10.30.51.39:9100' ]\n - targets: [ '10.30.51.40:9100' ]\n - targets: [ '10.30.51.50:9100' ]\n - targets: [ '10.30.51.51:9100' ]\n - targets: [ '10.30.51.65:9100' ]\n - targets: [ '10.30.51.66:9100' ]\n - targets: [ '10.30.51.67:9100' ]\n - targets: [ '10.30.51.68:9100' ]\n - targets: [ '10.30.51.70:9100' ]\n - targets: [ '10.30.51.71:9100' ]\n - targets: [ '10.32.8.14:9100' ]\n - targets: [ '10.32.8.15:9100' ]\n - targets: [ '10.32.8.16:9100' ]\n - targets: [ '10.32.8.17:9100' ]\n\n - job_name: 'Alertmanager'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'alertmanager' ]\n\n - job_name: 'Grafana'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'grafana' ]\n\n - job_name: 'Prometheus'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'prometheus' ]\n\n - job_name: 'Minio'\n bearer_token: eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ3NjQ1ODEzMzcsImlzcyI6InByb21ldGhldXMiLCJzdWIiOiJtaW5pbyJ9.oeTw3EIaiFmlDikrHXWiWXMH2vxLfDLkfjEC7G2N3M_keH_xyA_l2ofLLNYtopa_3GCEZnxLQdPuFZrmgpkDWg\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'storage' ]\n metrics_path: /minio/prometheus/metrics\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"prometheus\"\n port = \"prometheus\"\n tags = [ \"prometheus${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Prometheus Check Live\"\n type = \"http\"\n path = \"/-/healthy\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 2000\n memory = 8192\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"prometheus\" {\n static = 9090\n }\n }\n }\n }\n }\n}",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n%{ if use_canary }\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n%{ endif }\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-${service_name}\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = ${group_count}\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The volume stanza allows the group to specify that it requires a given\n # volume from the cluster.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/volume\n #\n %{ if use_host_volume }\n volume \"prod-volume1-${service_name}\" {\n type = \"host\"\n read_only = false\n source = \"${host_volume}\"\n }\n %{ endif }\n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"$${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-${service_name}\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"exec\"\n\n %{ if use_host_volume }\n volume_mount {\n volume = \"prod-volume1-${service_name}\"\n destination = \"${data_dir}\"\n read_only = false\n }\n %{ endif }\n\n %{ if use_vault_provider }\n vault {\n policies = \"${vault_kv_policy_name}\"\n }\n %{ endif }\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n command = \"local/prometheus-${version}.linux-amd64/prometheus\"\n args = [\n \"--config.file=secrets/prometheus.yml\",\n \"--storage.tsdb.path=${data_dir}prometheus/\",\n \"--storage.tsdb.retention.time=7d\"\n ]\n }\n\n # The artifact stanza instructs Nomad to fetch and unpack a remote resource,\n # such as a file, tarball, or binary. Nomad downloads artifacts using the\n # popular go-getter library, which permits downloading artifacts from a\n # variety of locations using a URL as the input source.\n #\n # For more information and examples on the \"artifact\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/artifact\n #\n artifact {\n source = \"${url}\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/alerts.yml\"\n left_delimiter = \"{{{\"\n right_delimiter = \"}}}\"\n data = \u003c\u003cEOH\n---\ngroups:\n- name: \"Jenkins Job Health Exporter\"\n rules:\n - alert: JenkinsJobHealthExporterFailures\n expr: jenkins_job_failure{id=~\".*\"} \u003e jenkins_job_success{id=~\".*\"}\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Jenkins Job Health detected high failure rate on jenkins jobs.\"\n description: \"Job: {{ $labels.id }}\"\n - alert: JenkinsJobHealthExporterUnstable\n expr: jenkins_job_unstable{id=~\".*\"} \u003e jenkins_job_success{id=~\".*\"}\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Jenkins Job Health detected high unstable rate on jenkins jobs.\"\n description: \"Job: {{ $labels.id }}\"\n- name: \"Consul\"\n rules:\n - alert: ConsulServiceHealthcheckFailed\n expr: consul_catalog_service_node_healthy == 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul service healthcheck failed (instance {{ $labels.instance }}).\"\n description: \"Service: `{{ $labels.service_name }}` Healthcheck: `{{ $labels.service_id }}`.\"\n - alert: ConsulMissingMasterNode\n expr: consul_raft_peers \u003c 3\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul missing master node (instance {{ $labels.instance }}).\"\n description: \"Numbers of consul raft peers should be 3, in order to preserve quorum.\"\n - alert: ConsulAgentUnhealthy\n expr: consul_health_node_status{status=\"critical\"} == 1\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul agent unhealthy (instance {{ $labels.instance }}).\"\n description: \"A Consul agent is down.\"\n- name: \"Hosts\"\n rules:\n - alert: NodeDown\n expr: up == 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus target missing (instance {{ $labels.instance }}).\"\n description: \"A Prometheus target has disappeared. An exporter might be crashed.\"\n - alert: HostOutOfMemory\n expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 \u003c 10\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host out of memory (instance {{ $labels.instance }}).\"\n description: \"Node memory is filling up (\u003c 10% left).\"\n - alert: HostOomKillDetected\n expr: increase(node_vmstat_oom_kill[1m]) \u003e 0\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host OOM kill detected (instance {{ $labels.instance }}).\"\n description: \"OOM kill detected.\"\n - alert: HostMemoryUnderMemoryPressure\n expr: rate(node_vmstat_pgmajfault[1m]) \u003e 1000\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host memory under memory pressure (instance {{ $labels.instance }}).\"\n description: \"The node is under heavy memory pressure. High rate of major page faults.\"\n - alert: HostOutOfDiskSpace\n expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes \u003c 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host out of disk space (instance {{ $labels.instance }}).\"\n description: \"Disk is almost full (\u003c 10% left).\"\n - alert: HostRaidDiskFailure\n expr: node_md_disks{state=\"failed\"} \u003e 0\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host RAID disk failure (instance {{ $labels.instance }}).\"\n description: \"At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap.\"\n - alert: HostConntrackLimit\n expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit \u003e 0.8\n for: 5m\n labels:\n severity: warning\n annotations:\n summary: \"Host conntrack limit (instance {{ $labels.instance }}).\"\n description: \"The number of conntrack is approching limit.\"\n - alert: HostNetworkInterfaceSaturated\n expr: (rate(node_network_receive_bytes_total{device!~\"^tap.*\"}[1m]) + rate(node_network_transmit_bytes_total{device!~\"^tap.*\"}[1m])) / node_network_speed_bytes{device!~\"^tap.*\"} \u003e 0.8\n for: 1m\n labels:\n severity: warning\n annotations:\n summary: \"Host Network Interface Saturated (instance {{ $labels.instance }}).\"\n description: \"The network interface {{ $labels.interface }} on {{ $labels.instance }} is getting overloaded.\"\n - alert: HostSystemdServiceCrashed\n expr: node_systemd_unit_state{state=\"failed\"} == 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host SystemD service crashed (instance {{ $labels.instance }}).\"\n description: \"SystemD service crashed.\"\n - alert: HostEdacCorrectableErrorsDetected\n expr: increase(node_edac_correctable_errors_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: info\n annotations:\n summary: \"Host EDAC Correctable Errors detected (instance {{ $labels.instance }}).\"\n description: '{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.'\n - alert: HostEdacUncorrectableErrorsDetected\n expr: node_edac_uncorrectable_errors_total \u003e 0\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }}).\"\n description: '{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.'\n- name: \"Min.io\"\n rules:\n - alert: MinioDiskOffline\n expr: minio_offline_disks \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Minio disk offline (instance {{ $labels.instance }})\"\n description: \"Minio disk is offline.\"\n - alert: MinioStorageSpaceExhausted\n expr: minio_disk_storage_free_bytes / 1024 / 1024 / 1024 \u003c 10\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Minio storage space exhausted (instance {{ $labels.instance }}).\"\n description: \"Minio storage space is low (\u003c 10 GB).\"\n- name: \"Prometheus\"\n rules:\n - alert: PrometheusConfigurationReloadFailure\n expr: prometheus_config_last_reload_successful != 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus configuration reload failure (instance {{ $labels.instance }}).\"\n description: \"Prometheus configuration reload error.\"\n - alert: PrometheusTooManyRestarts\n expr: changes(process_start_time_seconds{job=~\"prometheus|pushgateway|alertmanager\"}[15m]) \u003e 2\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus too many restarts (instance {{ $labels.instance }}).\"\n description: \"Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping.\"\n - alert: PrometheusAlertmanagerConfigurationReloadFailure\n expr: alertmanager_config_last_reload_successful != 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus AlertManager configuration reload failure (instance {{ $labels.instance }}).\"\n description: \"AlertManager configuration reload error.\"\n - alert: PrometheusRuleEvaluationFailures\n expr: increase(prometheus_rule_evaluation_failures_total[3m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus rule evaluation failures (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\"\n - alert: PrometheusTargetScrapingSlow\n expr: prometheus_target_interval_length_seconds{quantile=\"0.9\"} \u003e 60\n for: 5m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus target scraping slow (instance {{ $labels.instance }}).\"\n description: \"Prometheus is scraping exporters slowly.\"\n - alert: PrometheusTsdbCompactionsFailed\n expr: increase(prometheus_tsdb_compactions_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB compactions failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB compactions failures.\"\n - alert: PrometheusTsdbHeadTruncationsFailed\n expr: increase(prometheus_tsdb_head_truncations_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB head truncations failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB head truncation failures.\"\n - alert: PrometheusTsdbWalCorruptions\n expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB WAL corruptions (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB WAL corruptions.\"\n - alert: PrometheusTsdbWalTruncationsFailed\n expr: increase(prometheus_tsdb_wal_truncations_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB WAL truncation failures.\"\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/prometheus.yml\"\n data = \u003c\u003cEOH\n---\nglobal:\n scrape_interval: 5s\n scrape_timeout: 5s\n evaluation_interval: 5s\n\nalerting:\n alertmanagers:\n - consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'alertmanager' ]\n\nrule_files:\n - 'alerts.yml'\n\nscrape_configs:\n\n - job_name: 'Nomad Cluster'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'nomad-client', 'nomad' ]\n relabel_configs:\n - source_labels: [__meta_consul_tags]\n regex: '(.*)http(.*)'\n action: keep\n metrics_path: /v1/metrics\n params:\n format: [ 'prometheus' ]\n\n - job_name: 'Consul Cluster'\n static_configs:\n - targets: [ '10.30.51.22:8500' ]\n - targets: [ '10.30.51.24:8500' ]\n - targets: [ '10.30.51.25:8500' ]\n - targets: [ '10.30.51.26:8500' ]\n - targets: [ '10.30.51.28:8500' ]\n - targets: [ '10.30.51.29:8500' ]\n - targets: [ '10.30.51.30:8500' ]\n - targets: [ '10.30.51.39:8500' ]\n - targets: [ '10.30.51.40:8500' ]\n - targets: [ '10.30.51.50:8500' ]\n - targets: [ '10.30.51.51:8500' ]\n - targets: [ '10.30.51.65:8500' ]\n - targets: [ '10.30.51.66:8500' ]\n - targets: [ '10.30.51.67:8500' ]\n - targets: [ '10.30.51.68:8500' ]\n - targets: [ '10.30.51.70:8500' ]\n - targets: [ '10.30.51.71:8500' ]\n - targets: [ '10.32.8.14:8500' ]\n - targets: [ '10.32.8.15:8500' ]\n - targets: [ '10.32.8.16:8500' ]\n - targets: [ '10.32.8.17:8500' ]\n metrics_path: /v1/agent/metrics\n params:\n format: [ 'prometheus' ]\n\n - job_name: 'Blackbox Exporter (icmp)'\n static_configs:\n - targets: [ 'gerrit.fd.io' ]\n - targets: [ 'jenkins.fd.io' ]\n - targets: [ '10.32.8.17' ]\n params:\n module: [ 'icmp_v4' ]\n relabel_configs:\n - source_labels: [__address__]\n target_label: __param_target\n - source_labels: [__param_target]\n target_label: instance\n - target_label: __address__\n replacement: localhost:9115\n metrics_path: /probe\n\n - job_name: 'Blackbox Exporter (http)'\n static_configs:\n - targets: [ 'gerrit.fd.io' ]\n - targets: [ 'jenkins.fd.io' ]\n params:\n module: [ 'http_2xx' ]\n relabel_configs:\n - source_labels: [__address__]\n target_label: __param_target\n - source_labels: [__param_target]\n target_label: instance\n - target_label: __address__\n replacement: localhost:9115\n metrics_path: /probe\n\n - job_name: 'Jenkins Job Health Exporter'\n static_configs:\n - targets: [ '10.30.51.22:9186' ]\n metric_relabel_configs:\n - source_labels: [ __name__ ]\n regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'\n action: replace\n replacement: '$1'\n target_label: id\n - source_labels: [ __name__ ]\n regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'\n replacement: 'jenkins_job_$2'\n target_label: __name__\n\n - job_name: 'Node Exporter'\n static_configs:\n - targets: [ '10.30.51.22:9100' ]\n - targets: [ '10.30.51.24:9100' ]\n - targets: [ '10.30.51.25:9100' ]\n - targets: [ '10.30.51.26:9100' ]\n - targets: [ '10.30.51.28:9100' ]\n - targets: [ '10.30.51.29:9100' ]\n - targets: [ '10.30.51.30:9100' ]\n - targets: [ '10.30.51.39:9100' ]\n - targets: [ '10.30.51.40:9100' ]\n - targets: [ '10.30.51.50:9100' ]\n - targets: [ '10.30.51.51:9100' ]\n - targets: [ '10.30.51.65:9100' ]\n - targets: [ '10.30.51.66:9100' ]\n - targets: [ '10.30.51.67:9100' ]\n - targets: [ '10.30.51.68:9100' ]\n - targets: [ '10.30.51.70:9100' ]\n - targets: [ '10.30.51.71:9100' ]\n - targets: [ '10.32.8.14:9100' ]\n - targets: [ '10.32.8.15:9100' ]\n - targets: [ '10.32.8.16:9100' ]\n - targets: [ '10.32.8.17:9100' ]\n\n - job_name: 'Alertmanager'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'alertmanager' ]\n\n - job_name: 'Grafana'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'grafana' ]\n\n - job_name: 'Prometheus'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'prometheus' ]\n\n - job_name: 'Minio'\n bearer_token: eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ3NjQ1ODEzMzcsImlzcyI6InByb21ldGhldXMiLCJzdWIiOiJtaW5pbyJ9.oeTw3EIaiFmlDikrHXWiWXMH2vxLfDLkfjEC7G2N3M_keH_xyA_l2ofLLNYtopa_3GCEZnxLQdPuFZrmgpkDWg\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'storage' ]\n metrics_path: /minio/prometheus/metrics\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"${service_name}\"\n port = \"${service_name}\"\n tags = [ \"${service_name}$${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Prometheus Check Live\"\n type = \"http\"\n path = \"/-/healthy\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = ${cpu}\n memory = ${mem}\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"${service_name}\" {\n static = ${port}\n }\n }\n }\n }\n }\n}",
- "vars": {
- "cpu": "2000",
- "data_dir": "/data/",
- "datacenters": "yul1",
- "group_count": "4",
- "host_volume": "prod-volume-data1-1",
- "job_name": "prod-prometheus",
- "mem": "8192",
- "port": "9090",
- "service_name": "prometheus",
- "url": "https://github.com/prometheus/prometheus/releases/download/v2.28.1/prometheus-2.28.1.linux-amd64.tar.gz",
- "use_canary": "true",
- "use_host_volume": "true",
- "use_vault_provider": "false",
- "version": "2.28.1"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.prometheus",
- "mode": "managed",
- "type": "nomad_job",
- "name": "nomad_job_prometheus",
- "provider": "provider[\"registry.terraform.io/hashicorp/nomad\"].yul1",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "allocation_ids": [
- "9b7311cc-95ba-fead-8c92-91148f48d218",
- "5564fb46-6a89-1e92-7bcd-7f5fa55c42e8",
- "519e4ea1-f088-069c-3bc2-eb6de1b36b9e",
- "6f144324-2ee0-c1fd-0f71-c07cc1fda099",
- "6776d9ca-70ba-129f-05a9-ca6fccdb6289",
- "0fb06047-62e0-48e4-e352-63b4602407f2",
- "ddb72c78-1f3f-2d1e-0157-7b7bb05aaa4a",
- "67dfb1da-36a3-e442-5c66-8d6340f579c3"
- ],
- "datacenters": [
- "yul1"
- ],
- "deployment_id": "88fd2d42-3b5a-8e1e-3047-7a4f93417fd3",
- "deployment_status": "successful",
- "deregister_on_destroy": true,
- "deregister_on_id_change": true,
- "detach": false,
- "id": "prod-prometheus",
- "jobspec": "job \"prod-prometheus\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers\n #\n type = \"service\"\n\n update {\n # The \"max_parallel\" parameter specifies the maximum number of updates to\n # perform in parallel. In this case, this specifies to update a single task\n # at a time.\n max_parallel = 1\n\n health_check = \"checks\"\n\n # The \"min_healthy_time\" parameter specifies the minimum time the allocation\n # must be in the healthy state before it is marked as healthy and unblocks\n # further allocations from being updated.\n min_healthy_time = \"10s\"\n\n # The \"healthy_deadline\" parameter specifies the deadline in which the\n # allocation must be marked as healthy after which the allocation is\n # automatically transitioned to unhealthy. Transitioning to unhealthy will\n # fail the deployment and potentially roll back the job if \"auto_revert\" is\n # set to true.\n healthy_deadline = \"3m\"\n\n # The \"progress_deadline\" parameter specifies the deadline in which an\n # allocation must be marked as healthy. The deadline begins when the first\n # allocation for the deployment is created and is reset whenever an allocation\n # as part of the deployment transitions to a healthy state. If no allocation\n # transitions to the healthy state before the progress deadline, the\n # deployment is marked as failed.\n progress_deadline = \"10m\"\n\n\n # The \"canary\" parameter specifies that changes to the job that would result\n # in destructive updates should create the specified number of canaries\n # without stopping any previous allocations. Once the operator determines the\n # canaries are healthy, they can be promoted which unblocks a rolling update\n # of the remaining allocations at a rate of \"max_parallel\".\n #\n # Further, setting \"canary\" equal to the count of the task group allows\n # blue/green deployments. When the job is updated, a full set of the new\n # version is deployed and upon promotion the old version is stopped.\n canary = 1\n\n # Specifies if the job should auto-promote to the canary version when all\n # canaries become healthy during a deployment. Defaults to false which means\n # canaries must be manually updated with the nomad deployment promote\n # command.\n auto_promote = true\n\n # The \"auto_revert\" parameter specifies if the job should auto-revert to the\n # last stable job on deployment failure. A job is marked as stable if all the\n # allocations as part of its deployment were marked healthy.\n auto_revert = true\n\n }\n\n # The reschedule stanza specifies the group's rescheduling strategy. If\n # specified at the job level, the configuration will apply to all groups\n # within the job. If the reschedule stanza is present on both the job and the\n # group, they are merged with the group stanza taking the highest precedence\n # and then the job.\n reschedule {\n delay = \"30s\"\n delay_function = \"constant\"\n unlimited = true\n }\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group\n #\n group \"prod-group1-prometheus\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 4\n\n # The restart stanza configures a tasks's behavior on task failure. Restarts\n # happen on the client that is running the task.\n #\n # https://www.nomadproject.io/docs/job-specification/restart\n #\n restart {\n interval = \"30m\"\n attempts = 40\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The volume stanza allows the group to specify that it requires a given\n # volume from the cluster.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/volume\n #\n \n volume \"prod-volume1-prometheus\" {\n type = \"host\"\n read_only = false\n source = \"prod-volume-data1-1\"\n }\n \n\n # The constraint allows restricting the set of eligible nodes. Constraints\n # may filter on attributes or client metadata.\n #\n # For more information and examples on the \"volume\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/constraint\n #\n constraint {\n attribute = \"${attr.cpu.arch}\"\n operator = \"!=\"\n value = \"arm64\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task\n #\n task \"prod-task1-prometheus\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"exec\"\n\n \n volume_mount {\n volume = \"prod-volume1-prometheus\"\n destination = \"/data/\"\n read_only = false\n }\n \n\n \n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n command = \"local/prometheus-2.28.1.linux-amd64/prometheus\"\n args = [\n \"--config.file=secrets/prometheus.yml\",\n \"--storage.tsdb.path=/data/prometheus/\",\n \"--storage.tsdb.retention.time=7d\"\n ]\n }\n\n # The artifact stanza instructs Nomad to fetch and unpack a remote resource,\n # such as a file, tarball, or binary. Nomad downloads artifacts using the\n # popular go-getter library, which permits downloading artifacts from a\n # variety of locations using a URL as the input source.\n #\n # For more information and examples on the \"artifact\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/artifact\n #\n artifact {\n source = \"https://github.com/prometheus/prometheus/releases/download/v2.28.1/prometheus-2.28.1.linux-amd64.tar.gz\"\n }\n\n # The \"template\" stanza instructs Nomad to manage a template, such as\n # a configuration file or script. This template can optionally pull data\n # from Consul or Vault to populate runtime configuration data.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/template\n #\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/alerts.yml\"\n left_delimiter = \"{{{\"\n right_delimiter = \"}}}\"\n data = \u003c\u003cEOH\n---\ngroups:\n- name: \"Jenkins Job Health Exporter\"\n rules:\n - alert: JenkinsJobHealthExporterFailures\n expr: jenkins_job_failure{id=~\".*\"} \u003e jenkins_job_success{id=~\".*\"}\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Jenkins Job Health detected high failure rate on jenkins jobs.\"\n description: \"Job: {{ $labels.id }}\"\n - alert: JenkinsJobHealthExporterUnstable\n expr: jenkins_job_unstable{id=~\".*\"} \u003e jenkins_job_success{id=~\".*\"}\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Jenkins Job Health detected high unstable rate on jenkins jobs.\"\n description: \"Job: {{ $labels.id }}\"\n- name: \"Consul\"\n rules:\n - alert: ConsulServiceHealthcheckFailed\n expr: consul_catalog_service_node_healthy == 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul service healthcheck failed (instance {{ $labels.instance }}).\"\n description: \"Service: `{{ $labels.service_name }}` Healthcheck: `{{ $labels.service_id }}`.\"\n - alert: ConsulMissingMasterNode\n expr: consul_raft_peers \u003c 3\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul missing master node (instance {{ $labels.instance }}).\"\n description: \"Numbers of consul raft peers should be 3, in order to preserve quorum.\"\n - alert: ConsulAgentUnhealthy\n expr: consul_health_node_status{status=\"critical\"} == 1\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Consul agent unhealthy (instance {{ $labels.instance }}).\"\n description: \"A Consul agent is down.\"\n- name: \"Hosts\"\n rules:\n - alert: NodeDown\n expr: up == 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus target missing (instance {{ $labels.instance }}).\"\n description: \"A Prometheus target has disappeared. An exporter might be crashed.\"\n - alert: HostOutOfMemory\n expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 \u003c 10\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host out of memory (instance {{ $labels.instance }}).\"\n description: \"Node memory is filling up (\u003c 10% left).\"\n - alert: HostOomKillDetected\n expr: increase(node_vmstat_oom_kill[1m]) \u003e 0\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host OOM kill detected (instance {{ $labels.instance }}).\"\n description: \"OOM kill detected.\"\n - alert: HostMemoryUnderMemoryPressure\n expr: rate(node_vmstat_pgmajfault[1m]) \u003e 1000\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host memory under memory pressure (instance {{ $labels.instance }}).\"\n description: \"The node is under heavy memory pressure. High rate of major page faults.\"\n - alert: HostOutOfDiskSpace\n expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes \u003c 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host out of disk space (instance {{ $labels.instance }}).\"\n description: \"Disk is almost full (\u003c 10% left).\"\n - alert: HostRaidDiskFailure\n expr: node_md_disks{state=\"failed\"} \u003e 0\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Host RAID disk failure (instance {{ $labels.instance }}).\"\n description: \"At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap.\"\n - alert: HostConntrackLimit\n expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit \u003e 0.8\n for: 5m\n labels:\n severity: warning\n annotations:\n summary: \"Host conntrack limit (instance {{ $labels.instance }}).\"\n description: \"The number of conntrack is approching limit.\"\n - alert: HostNetworkInterfaceSaturated\n expr: (rate(node_network_receive_bytes_total{device!~\"^tap.*\"}[1m]) + rate(node_network_transmit_bytes_total{device!~\"^tap.*\"}[1m])) / node_network_speed_bytes{device!~\"^tap.*\"} \u003e 0.8\n for: 1m\n labels:\n severity: warning\n annotations:\n summary: \"Host Network Interface Saturated (instance {{ $labels.instance }}).\"\n description: \"The network interface {{ $labels.interface }} on {{ $labels.instance }} is getting overloaded.\"\n - alert: HostSystemdServiceCrashed\n expr: node_systemd_unit_state{state=\"failed\"} == 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host SystemD service crashed (instance {{ $labels.instance }}).\"\n description: \"SystemD service crashed.\"\n - alert: HostEdacCorrectableErrorsDetected\n expr: increase(node_edac_correctable_errors_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: info\n annotations:\n summary: \"Host EDAC Correctable Errors detected (instance {{ $labels.instance }}).\"\n description: '{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} correctable memory errors reported by EDAC in the last 5 minutes.'\n - alert: HostEdacUncorrectableErrorsDetected\n expr: node_edac_uncorrectable_errors_total \u003e 0\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }}).\"\n description: '{{ $labels.instance }} has had {{ printf \"%.0f\" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.'\n- name: \"Min.io\"\n rules:\n - alert: MinioDiskOffline\n expr: minio_offline_disks \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Minio disk offline (instance {{ $labels.instance }})\"\n description: \"Minio disk is offline.\"\n - alert: MinioStorageSpaceExhausted\n expr: minio_disk_storage_free_bytes / 1024 / 1024 / 1024 \u003c 10\n for: 2m\n labels:\n severity: warning\n annotations:\n summary: \"Minio storage space exhausted (instance {{ $labels.instance }}).\"\n description: \"Minio storage space is low (\u003c 10 GB).\"\n- name: \"Prometheus\"\n rules:\n - alert: PrometheusConfigurationReloadFailure\n expr: prometheus_config_last_reload_successful != 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus configuration reload failure (instance {{ $labels.instance }}).\"\n description: \"Prometheus configuration reload error.\"\n - alert: PrometheusTooManyRestarts\n expr: changes(process_start_time_seconds{job=~\"prometheus|pushgateway|alertmanager\"}[15m]) \u003e 2\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus too many restarts (instance {{ $labels.instance }}).\"\n description: \"Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping.\"\n - alert: PrometheusAlertmanagerConfigurationReloadFailure\n expr: alertmanager_config_last_reload_successful != 1\n for: 0m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus AlertManager configuration reload failure (instance {{ $labels.instance }}).\"\n description: \"AlertManager configuration reload error.\"\n - alert: PrometheusRuleEvaluationFailures\n expr: increase(prometheus_rule_evaluation_failures_total[3m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus rule evaluation failures (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\"\n - alert: PrometheusTargetScrapingSlow\n expr: prometheus_target_interval_length_seconds{quantile=\"0.9\"} \u003e 60\n for: 5m\n labels:\n severity: warning\n annotations:\n summary: \"Prometheus target scraping slow (instance {{ $labels.instance }}).\"\n description: \"Prometheus is scraping exporters slowly.\"\n - alert: PrometheusTsdbCompactionsFailed\n expr: increase(prometheus_tsdb_compactions_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB compactions failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB compactions failures.\"\n - alert: PrometheusTsdbHeadTruncationsFailed\n expr: increase(prometheus_tsdb_head_truncations_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB head truncations failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB head truncation failures.\"\n - alert: PrometheusTsdbWalCorruptions\n expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB WAL corruptions (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB WAL corruptions.\"\n - alert: PrometheusTsdbWalTruncationsFailed\n expr: increase(prometheus_tsdb_wal_truncations_failed_total[1m]) \u003e 0\n for: 0m\n labels:\n severity: critical\n annotations:\n summary: \"Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }}).\"\n description: \"Prometheus encountered {{ $value }} TSDB WAL truncation failures.\"\nEOH\n }\n\n template {\n change_mode = \"noop\"\n change_signal = \"SIGINT\"\n destination = \"secrets/prometheus.yml\"\n data = \u003c\u003cEOH\n---\nglobal:\n scrape_interval: 5s\n scrape_timeout: 5s\n evaluation_interval: 5s\n\nalerting:\n alertmanagers:\n - consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'alertmanager' ]\n\nrule_files:\n - 'alerts.yml'\n\nscrape_configs:\n\n - job_name: 'Nomad Cluster'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'nomad-client', 'nomad' ]\n relabel_configs:\n - source_labels: [__meta_consul_tags]\n regex: '(.*)http(.*)'\n action: keep\n metrics_path: /v1/metrics\n params:\n format: [ 'prometheus' ]\n\n - job_name: 'Consul Cluster'\n static_configs:\n - targets: [ '10.30.51.22:8500' ]\n - targets: [ '10.30.51.24:8500' ]\n - targets: [ '10.30.51.25:8500' ]\n - targets: [ '10.30.51.26:8500' ]\n - targets: [ '10.30.51.28:8500' ]\n - targets: [ '10.30.51.29:8500' ]\n - targets: [ '10.30.51.30:8500' ]\n - targets: [ '10.30.51.39:8500' ]\n - targets: [ '10.30.51.40:8500' ]\n - targets: [ '10.30.51.50:8500' ]\n - targets: [ '10.30.51.51:8500' ]\n - targets: [ '10.30.51.65:8500' ]\n - targets: [ '10.30.51.66:8500' ]\n - targets: [ '10.30.51.67:8500' ]\n - targets: [ '10.30.51.68:8500' ]\n - targets: [ '10.30.51.70:8500' ]\n - targets: [ '10.30.51.71:8500' ]\n - targets: [ '10.32.8.14:8500' ]\n - targets: [ '10.32.8.15:8500' ]\n - targets: [ '10.32.8.16:8500' ]\n - targets: [ '10.32.8.17:8500' ]\n metrics_path: /v1/agent/metrics\n params:\n format: [ 'prometheus' ]\n\n - job_name: 'Blackbox Exporter (icmp)'\n static_configs:\n - targets: [ 'gerrit.fd.io' ]\n - targets: [ 'jenkins.fd.io' ]\n - targets: [ '10.32.8.17' ]\n params:\n module: [ 'icmp_v4' ]\n relabel_configs:\n - source_labels: [__address__]\n target_label: __param_target\n - source_labels: [__param_target]\n target_label: instance\n - target_label: __address__\n replacement: localhost:9115\n metrics_path: /probe\n\n - job_name: 'Blackbox Exporter (http)'\n static_configs:\n - targets: [ 'gerrit.fd.io' ]\n - targets: [ 'jenkins.fd.io' ]\n params:\n module: [ 'http_2xx' ]\n relabel_configs:\n - source_labels: [__address__]\n target_label: __param_target\n - source_labels: [__param_target]\n target_label: instance\n - target_label: __address__\n replacement: localhost:9115\n metrics_path: /probe\n\n - job_name: 'Jenkins Job Health Exporter'\n static_configs:\n - targets: [ '10.30.51.22:9186' ]\n metric_relabel_configs:\n - source_labels: [ __name__ ]\n regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'\n action: replace\n replacement: '$1'\n target_label: id\n - source_labels: [ __name__ ]\n regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'\n replacement: 'jenkins_job_$2'\n target_label: __name__\n\n - job_name: 'Node Exporter'\n static_configs:\n - targets: [ '10.30.51.22:9100' ]\n - targets: [ '10.30.51.24:9100' ]\n - targets: [ '10.30.51.25:9100' ]\n - targets: [ '10.30.51.26:9100' ]\n - targets: [ '10.30.51.28:9100' ]\n - targets: [ '10.30.51.29:9100' ]\n - targets: [ '10.30.51.30:9100' ]\n - targets: [ '10.30.51.39:9100' ]\n - targets: [ '10.30.51.40:9100' ]\n - targets: [ '10.30.51.50:9100' ]\n - targets: [ '10.30.51.51:9100' ]\n - targets: [ '10.30.51.65:9100' ]\n - targets: [ '10.30.51.66:9100' ]\n - targets: [ '10.30.51.67:9100' ]\n - targets: [ '10.30.51.68:9100' ]\n - targets: [ '10.30.51.70:9100' ]\n - targets: [ '10.30.51.71:9100' ]\n - targets: [ '10.32.8.14:9100' ]\n - targets: [ '10.32.8.15:9100' ]\n - targets: [ '10.32.8.16:9100' ]\n - targets: [ '10.32.8.17:9100' ]\n\n - job_name: 'Alertmanager'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'alertmanager' ]\n\n - job_name: 'Grafana'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'grafana' ]\n\n - job_name: 'Prometheus'\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'prometheus' ]\n\n - job_name: 'Minio'\n bearer_token: eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ3NjQ1ODEzMzcsImlzcyI6InByb21ldGhldXMiLCJzdWIiOiJtaW5pbyJ9.oeTw3EIaiFmlDikrHXWiWXMH2vxLfDLkfjEC7G2N3M_keH_xyA_l2ofLLNYtopa_3GCEZnxLQdPuFZrmgpkDWg\n consul_sd_configs:\n - server: '{{ env \"NOMAD_IP_prometheus\" }}:8500'\n services: [ 'storage' ]\n metrics_path: /minio/prometheus/metrics\nEOH\n }\n\n # The service stanza instructs Nomad to register a service with Consul.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/service\n #\n service {\n name = \"prometheus\"\n port = \"prometheus\"\n tags = [ \"prometheus${NOMAD_ALLOC_INDEX}\" ]\n check {\n name = \"Prometheus Check Live\"\n type = \"http\"\n path = \"/-/healthy\"\n interval = \"10s\"\n timeout = \"2s\"\n }\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources\n #\n resources {\n cpu = 2000\n memory = 8192\n # The network stanza specifies the networking requirements for the task\n # group, including the network mode and port allocations. When scheduling\n # jobs in Nomad they are provisioned across your fleet of machines along\n # with other jobs and services. Because you don't know in advance what host\n # your job will be provisioned on, Nomad will provide your tasks with\n # network configuration when they start up.\n #\n # For more information and examples on the \"template\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/network\n #\n network {\n port \"prometheus\" {\n static = 9090\n }\n }\n }\n }\n }\n}",
- "json": null,
- "modify_index": "8995654",
- "name": "prod-prometheus",
- "namespace": "default",
- "policy_override": null,
- "purge_on_destroy": null,
- "region": "global",
- "task_groups": [
- {
- "count": 4,
- "meta": {},
- "name": "prod-group1-prometheus",
- "task": [
- {
- "driver": "exec",
- "meta": {},
- "name": "prod-task1-prometheus",
- "volume_mounts": [
- {
- "destination": "/data/",
- "read_only": false,
- "volume": "prod-volume1-prometheus"
- }
- ]
- }
- ],
- "volumes": [
- {
- "name": "prod-volume1-prometheus",
- "read_only": false,
- "source": "prod-volume-data1-1",
- "type": "host"
- }
- ]
- }
- ],
- "type": "service"
- },
- "sensitive_attributes": [],
- "private": "bnVsbA==",
- "dependencies": [
- "module.prometheus.data.template_file.nomad_job_prometheus"
- ]
- }
- ]
- },
- {
- "module": "module.vpp_device",
- "mode": "data",
- "type": "template_file",
- "name": "nomad_job_csit_shim",
- "provider": "provider[\"registry.terraform.io/hashicorp/template\"]",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "filename": null,
- "id": "bebf27b2f8eb532adad3cabb86953deed4d0e9970ab0524d7857b3ebeebb917d",
- "rendered": "job \"prod-device-csit-shim\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"system\"\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-csit-shim-amd\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n constraint {\n attribute = \"${node.class}\"\n value = \"csit\"\n }\n\n restart {\n interval = \"1m\"\n attempts = 3\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-csit-shim-amd\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"fdiotools/csit_shim-ubuntu2004:2021_03_04_142103_UTC-x86_64\"\n network_mode = \"host\"\n pid_mode = \"host\"\n volumes = [\n \"/var/run/docker.sock:/var/run/docker.sock\"\n ]\n privileged = true\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 1500\n memory = 4096\n network {\n port \"ssh\" {\n static = 6022\n }\n port \"ssh2\" {\n static = 6023\n }\n }\n }\n }\n }\n\n group \"prod-group1-csit-shim-arm\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n constraint {\n attribute = \"${node.class}\"\n value = \"csitarm\"\n }\n\n restart {\n interval = \"1m\"\n attempts = 3\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-csit-shim-arm\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"fdiotools/csit_shim-ubuntu2004:2021_03_02_143938_UTC-aarch64\"\n network_mode = \"host\"\n pid_mode = \"host\"\n volumes = [\n \"/var/run/docker.sock:/var/run/docker.sock\"\n ]\n privileged = true\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 1500\n memory = 4096\n network {\n port \"ssh\" {\n static = 6022\n }\n port \"ssh2\" {\n static = 6023\n }\n }\n }\n }\n }\n}",
- "template": "job \"${job_name}\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"${datacenters}\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"system\"\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-csit-shim-amd\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = ${group_count}\n\n constraint {\n attribute = \"$${node.class}\"\n value = \"csit\"\n }\n\n restart {\n interval = \"1m\"\n attempts = 3\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-csit-shim-amd\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"${image_x86_64}\"\n network_mode = \"host\"\n pid_mode = \"host\"\n volumes = [\n \"/var/run/docker.sock:/var/run/docker.sock\"\n ]\n privileged = true\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = ${cpu}\n memory = ${mem}\n network {\n port \"ssh\" {\n static = 6022\n }\n port \"ssh2\" {\n static = 6023\n }\n }\n }\n }\n }\n\n group \"prod-group1-csit-shim-arm\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = ${group_count}\n\n constraint {\n attribute = \"$${node.class}\"\n value = \"csitarm\"\n }\n\n restart {\n interval = \"1m\"\n attempts = 3\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-csit-shim-arm\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"${image_aarch64}\"\n network_mode = \"host\"\n pid_mode = \"host\"\n volumes = [\n \"/var/run/docker.sock:/var/run/docker.sock\"\n ]\n privileged = true\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = ${cpu}\n memory = ${mem}\n network {\n port \"ssh\" {\n static = 6022\n }\n port \"ssh2\" {\n static = 6023\n }\n }\n }\n }\n }\n}",
- "vars": {
- "cpu": "1500",
- "datacenters": "yul1",
- "group_count": "1",
- "image_aarch64": "fdiotools/csit_shim-ubuntu2004:2021_03_02_143938_UTC-aarch64",
- "image_x86_64": "fdiotools/csit_shim-ubuntu2004:2021_03_04_142103_UTC-x86_64",
- "job_name": "prod-device-csit-shim",
- "mem": "4096"
- }
- },
- "sensitive_attributes": []
- }
- ]
- },
- {
- "module": "module.vpp_device",
- "mode": "managed",
- "type": "nomad_job",
- "name": "nomad_job_csit_shim",
- "provider": "provider[\"registry.terraform.io/hashicorp/nomad\"].yul1",
- "instances": [
- {
- "schema_version": 0,
- "attributes": {
- "allocation_ids": [
- "0c5c6e61-f8e5-bc82-0de9-5ac202d3600c",
- "f9058e31-91d4-4cde-cef8-25ce59f2f4b6",
- "d89ab9c0-4c2c-8430-f7ea-b609fc8285ac",
- "055428eb-e662-5e07-14f1-afc32a2a491f"
- ],
- "datacenters": [
- "yul1"
- ],
- "deployment_id": "",
- "deployment_status": "",
- "deregister_on_destroy": true,
- "deregister_on_id_change": true,
- "detach": false,
- "id": "prod-device-csit-shim",
- "jobspec": "job \"prod-device-csit-shim\" {\n # The \"region\" parameter specifies the region in which to execute the job.\n # If omitted, this inherits the default region name of \"global\".\n # region = \"global\"\n #\n # The \"datacenters\" parameter specifies the list of datacenters which should\n # be considered when placing this task. This must be provided.\n datacenters = \"yul1\"\n\n # The \"type\" parameter controls the type of job, which impacts the scheduler's\n # decision on placement. This configuration is optional and defaults to\n # \"service\". For a full list of job types and their differences, please see\n # the online documentation.\n #\n # For more information, please see the online documentation at:\n #\n # https://www.nomadproject.io/docs/jobspec/schedulers.html\n #\n type = \"system\"\n\n # The \"group\" stanza defines a series of tasks that should be co-located on\n # the same Nomad client. Any task within a group will be placed on the same\n # client.\n #\n # For more information and examples on the \"group\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/group.html\n #\n group \"prod-group1-csit-shim-amd\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n constraint {\n attribute = \"${node.class}\"\n value = \"csit\"\n }\n\n restart {\n interval = \"1m\"\n attempts = 3\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-csit-shim-amd\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"fdiotools/csit_shim-ubuntu2004:2021_03_04_142103_UTC-x86_64\"\n network_mode = \"host\"\n pid_mode = \"host\"\n volumes = [\n \"/var/run/docker.sock:/var/run/docker.sock\"\n ]\n privileged = true\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 1500\n memory = 4096\n network {\n port \"ssh\" {\n static = 6022\n }\n port \"ssh2\" {\n static = 6023\n }\n }\n }\n }\n }\n\n group \"prod-group1-csit-shim-arm\" {\n # The \"count\" parameter specifies the number of the task groups that should\n # be running under this group. This value must be non-negative and defaults\n # to 1.\n count = 1\n\n constraint {\n attribute = \"${node.class}\"\n value = \"csitarm\"\n }\n\n restart {\n interval = \"1m\"\n attempts = 3\n delay = \"15s\"\n mode = \"delay\"\n }\n\n # The \"task\" stanza creates an individual unit of work, such as a Docker\n # container, web application, or batch processing.\n #\n # For more information and examples on the \"task\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/task.html\n #\n task \"prod-task1-csit-shim-arm\" {\n # The \"driver\" parameter specifies the task driver that should be used to\n # run the task.\n driver = \"docker\"\n\n # The \"config\" stanza specifies the driver configuration, which is passed\n # directly to the driver to start the task. The details of configurations\n # are specific to each driver, so please see specific driver\n # documentation for more information.\n config {\n image = \"fdiotools/csit_shim-ubuntu2004:2021_03_02_143938_UTC-aarch64\"\n network_mode = \"host\"\n pid_mode = \"host\"\n volumes = [\n \"/var/run/docker.sock:/var/run/docker.sock\"\n ]\n privileged = true\n }\n\n # The \"resources\" stanza describes the requirements a task needs to\n # execute. Resource requirements include memory, network, cpu, and more.\n # This ensures the task will execute on a machine that contains enough\n # resource capacity.\n #\n # For more information and examples on the \"resources\" stanza, please see\n # the online documentation at:\n #\n # https://www.nomadproject.io/docs/job-specification/resources.html\n #\n resources {\n cpu = 1500\n memory = 4096\n network {\n port \"ssh\" {\n static = 6022\n }\n port \"ssh2\" {\n static = 6023\n }\n }\n }\n }\n }\n}",
- "json": null,
- "modify_index": "7575030",
- "name": "prod-device-csit-shim",
- "namespace": "default",
- "policy_override": null,
- "purge_on_destroy": null,
- "region": "global",
- "task_groups": [
- {
- "count": 1,
- "meta": {},
- "name": "prod-group1-csit-shim-amd",
- "task": [
- {
- "driver": "docker",
- "meta": {},
- "name": "prod-task1-csit-shim-amd",
- "volume_mounts": []
- }
- ],
- "volumes": []
- },
- {
- "count": 1,
- "meta": {},
- "name": "prod-group1-csit-shim-arm",
- "task": [
- {
- "driver": "docker",
- "meta": {},
- "name": "prod-task1-csit-shim-arm",
- "volume_mounts": []
- }
- ],
- "volumes": []
- }
- ],
- "type": "system"
- },
- "sensitive_attributes": [],
- "private": "bnVsbA==",
- "dependencies": [
- "module.vpp_device.data.template_file.nomad_job_csit_shim"
- ]
- }
- ]
- }
- ]
-}
diff --git a/fdio.infra.terraform/1n_nmd/variables.tf b/fdio.infra.terraform/1n_nmd/variables.tf
index a575d0b51d..598770eb13 100644
--- a/fdio.infra.terraform/1n_nmd/variables.tf
+++ b/fdio.infra.terraform/1n_nmd/variables.tf
@@ -1,11 +1,35 @@
+variable "nomad_acl" {
+ description = "Nomad ACLs enabled/disabled"
+ type = bool
+ default = false
+}
+
variable "nomad_provider_address" {
description = "FD.io Nomad cluster address."
type = string
default = "http://nomad.service.consul:4646"
}
-variable "nomad_acl" {
- description = "Nomad ACLs enabled/disabled"
- type = bool
- default = false
+variable "nomad_provider_ca_file" {
+ description = "A local file path to a PEM-encoded certificate authority."
+ type = string
+ default = "/etc/nomad.d/ssl/nomad-ca.pem"
+}
+
+variable "nomad_provider_cert_file" {
+ description = "A local file path to a PEM-encoded certificate."
+ type = string
+ default = "/etc/nomad.d/ssl/nomad-cli.pem"
+}
+
+variable "nomad_provider_key_file" {
+ description = "A local file path to a PEM-encoded private key."
+ type = string
+ default = "/etc/nomad.d/ssl/nomad-cli-key.pem"
+}
+
+variable "token" {
+ description = "Vault root token"
+ type = string
+ sensitive = true
} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/versions.tf b/fdio.infra.terraform/1n_nmd/versions.tf
new file mode 100644
index 0000000000..556ddbaee4
--- /dev/null
+++ b/fdio.infra.terraform/1n_nmd/versions.tf
@@ -0,0 +1,21 @@
+terraform {
+ backend "consul" {
+ address = "consul.service.consul:8500"
+ scheme = "http"
+ path = "terraform/nomad"
+ }
+ required_providers {
+ nomad = {
+ source = "hashicorp/nomad"
+ version = "~> 1.4.9"
+ }
+ template = {
+ source = "hashicorp/template"
+ version = "~> 2.2.0"
+ }
+ vault = {
+ version = ">=2.14.0"
+ }
+ }
+ required_version = ">= 1.0.3"
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/vpp_device/versions.tf b/fdio.infra.terraform/1n_nmd/vpp_device/versions.tf
new file mode 100644
index 0000000000..b80610a525
--- /dev/null
+++ b/fdio.infra.terraform/1n_nmd/vpp_device/versions.tf
@@ -0,0 +1,13 @@
+terraform {
+ required_providers {
+ nomad = {
+ source = "hashicorp/nomad"
+ version = "~> 1.4.15"
+ }
+ template = {
+ source = "hashicorp/template"
+ version = "~> 2.2.0"
+ }
+ }
+ required_version = ">= 1.0.3"
+}
diff --git a/resources/tools/storage/__main__.py b/resources/tools/storage/__main__.py
index e8452dc9d1..d17c9e2292 100644
--- a/resources/tools/storage/__main__.py
+++ b/resources/tools/storage/__main__.py
@@ -39,11 +39,11 @@ def main():
args = parser.parse_args()
json_iterator = Storage(
- endpoint_url=u"http://storage.service.consul:9000",
- bucket=u"docs",
- profile_name=u"nomad-s3"
+ endpoint_url=u"http://minio.service.consul:9001",
+ bucket=u"fdio-logs-s3-cloudfront-index",
+ profile_name=u"default"
).s3_file_processing(
- prefix=u"", expression=args.expression
+ prefix=u"vex-yul-rot-jenkins-1/csit-vpp-perf", expression=args.expression
)
for item in json_iterator:
print(dumps(item, indent=4, sort_keys=False))