aboutsummaryrefslogtreecommitdiffstats
path: root/fdio.infra.terraform/1n_nmd
diff options
context:
space:
mode:
Diffstat (limited to 'fdio.infra.terraform/1n_nmd')
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl.tftpl377
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/fdio/main.tf14
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/fdio/providers.tf13
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/fdio/variables.tf47
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/fdio/versions.tf17
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/main.tf48
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/variables.tf157
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/versions.tf9
-rw-r--r--fdio.infra.terraform/1n_nmd/etl/conf/nomad/etl.hcl.tftpl318
-rw-r--r--fdio.infra.terraform/1n_nmd/etl/fdio/main.tf23
-rw-r--r--fdio.infra.terraform/1n_nmd/etl/fdio/providers.tf13
-rw-r--r--fdio.infra.terraform/1n_nmd/etl/fdio/variables.tf47
-rw-r--r--fdio.infra.terraform/1n_nmd/etl/fdio/versions.tf17
-rw-r--r--fdio.infra.terraform/1n_nmd/etl/main.tf33
-rw-r--r--fdio.infra.terraform/1n_nmd/etl/variables.tf115
-rw-r--r--fdio.infra.terraform/1n_nmd/etl/versions.tf9
-rw-r--r--fdio.infra.terraform/1n_nmd/main.tf89
-rw-r--r--fdio.infra.terraform/1n_nmd/minio_s3_gateway/conf/nomad/minio.hcl246
-rw-r--r--fdio.infra.terraform/1n_nmd/minio_s3_gateway/main.tf51
-rw-r--r--fdio.infra.terraform/1n_nmd/minio_s3_gateway/variables.tf199
-rw-r--r--fdio.infra.terraform/1n_nmd/minio_s3_gateway/versions.tf13
-rw-r--r--fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl.tftpl624
-rw-r--r--fdio.infra.terraform/1n_nmd/prometheus/fdio/main.tf10
-rw-r--r--fdio.infra.terraform/1n_nmd/prometheus/fdio/providers.tf13
-rw-r--r--fdio.infra.terraform/1n_nmd/prometheus/fdio/variables.tf47
-rw-r--r--fdio.infra.terraform/1n_nmd/prometheus/fdio/versions.tf17
-rw-r--r--fdio.infra.terraform/1n_nmd/prometheus/main.tf42
-rw-r--r--fdio.infra.terraform/1n_nmd/prometheus/variables.tf127
-rw-r--r--fdio.infra.terraform/1n_nmd/prometheus/versions.tf9
-rw-r--r--fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/main.tf17
-rw-r--r--fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/providers.tf5
-rw-r--r--fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/variables.tf17
-rw-r--r--fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/versions.tf13
-rw-r--r--fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/main.tf37
-rw-r--r--fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/variables.tf17
-rw-r--r--fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/versions.tf8
36 files changed, 0 insertions, 2858 deletions
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl.tftpl b/fdio.infra.terraform/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl.tftpl
deleted file mode 100644
index 87206ac5a0..0000000000
--- a/fdio.infra.terraform/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl.tftpl
+++ /dev/null
@@ -1,377 +0,0 @@
-job "${job_name}" {
- # The "region" parameter specifies the region in which to execute the job.
- # If omitted, this inherits the default region name of "global".
- # region = "${region}"
-
- # The "datacenters" parameter specifies the list of datacenters which should
- # be considered when placing this task. This must be provided.
- datacenters = "${datacenters}"
-
- # The "type" parameter controls the type of job, which impacts the scheduler's
- # decision on placement. This configuration is optional and defaults to
- # "service". For a full list of job types and their differences, please see
- # the online documentation.
- #
- # https://www.nomadproject.io/docs/jobspec/schedulers
- #
- type = "service"
-
- update {
- # The "max_parallel" parameter specifies the maximum number of updates to
- # perform in parallel. In this case, this specifies to update a single task
- # at a time.
- max_parallel = ${max_parallel}
-
- health_check = "checks"
-
- # The "min_healthy_time" parameter specifies the minimum time the allocation
- # must be in the healthy state before it is marked as healthy and unblocks
- # further allocations from being updated.
- min_healthy_time = "10s"
-
- # The "healthy_deadline" parameter specifies the deadline in which the
- # allocation must be marked as healthy after which the allocation is
- # automatically transitioned to unhealthy. Transitioning to unhealthy will
- # fail the deployment and potentially roll back the job if "auto_revert" is
- # set to true.
- healthy_deadline = "3m"
-
- # The "progress_deadline" parameter specifies the deadline in which an
- # allocation must be marked as healthy. The deadline begins when the first
- # allocation for the deployment is created and is reset whenever an allocation
- # as part of the deployment transitions to a healthy state. If no allocation
- # transitions to the healthy state before the progress deadline, the
- # deployment is marked as failed.
- progress_deadline = "10m"
-
-%{ if use_canary }
- # The "canary" parameter specifies that changes to the job that would result
- # in destructive updates should create the specified number of canaries
- # without stopping any previous allocations. Once the operator determines the
- # canaries are healthy, they can be promoted which unblocks a rolling update
- # of the remaining allocations at a rate of "max_parallel".
- #
- # Further, setting "canary" equal to the count of the task group allows
- # blue/green deployments. When the job is updated, a full set of the new
- # version is deployed and upon promotion the old version is stopped.
- canary = ${canary}
-
- # Specifies if the job should auto-promote to the canary version when all
- # canaries become healthy during a deployment. Defaults to false which means
- # canaries must be manually updated with the nomad deployment promote
- # command.
- auto_promote = ${auto_promote}
-
- # The "auto_revert" parameter specifies if the job should auto-revert to the
- # last stable job on deployment failure. A job is marked as stable if all the
- # allocations as part of its deployment were marked healthy.
- auto_revert = ${auto_revert}
-%{ endif }
- }
-
- # All groups in this job should be scheduled on different hosts.
- constraint {
- operator = "distinct_hosts"
- value = "true"
- }
-
- # The "group" stanza defines a series of tasks that should be co-located on
- # the same Nomad client. Any task within a group will be placed on the same
- # client.
- #
- # https://www.nomadproject.io/docs/job-specification/group
- #
- group "${job_name}-group-1" {
- # The "count" parameter specifies the number of the task groups that should
- # be running under this group. This value must be non-negative and defaults
- # to 1.
- count = ${group_count}
-
- # The volume stanza allows the group to specify that it requires a given
- # volume from the cluster. The key of the stanza is the name of the volume
- # as it will be exposed to task configuration.
- #
- # https://www.nomadproject.io/docs/job-specification/volume
- %{ if use_host_volume }
- volume "${job_name}-volume-1" {
- type = "host"
- read_only = false
- source = "${volume_source}"
- }
- %{ endif }
-
- # The restart stanza configures a tasks's behavior on task failure. Restarts
- # happen on the client that is running the task.
- #
- # https://www.nomadproject.io/docs/job-specification/restart
- #
- restart {
- interval = "30m"
- attempts = 40
- delay = "15s"
- mode = "delay"
- }
-
- # The constraint allows restricting the set of eligible nodes. Constraints
- # may filter on attributes or client metadata.
- #
- # https://www.nomadproject.io/docs/job-specification/constraint
- #
- constraint {
- attribute = "$${attr.cpu.arch}"
- operator = "!="
- value = "arm64"
- }
-
- constraint {
- attribute = "$${node.class}"
- value = "builder"
- }
-
- # The network stanza specifies the networking requirements for the task
- # group, including the network mode and port allocations. When scheduling
- # jobs in Nomad they are provisioned across your fleet of machines along
- # with other jobs and services. Because you don't know in advance what host
- # your job will be provisioned on, Nomad will provide your tasks with
- # network configuration when they start up.
- #
- # https://www.nomadproject.io/docs/job-specification/network
- #
- network {
- port "${service_name}" {
- static = ${port}
- to = ${port}
- }
- }
-
- # The "task" stanza creates an individual unit of work, such as a Docker
- # container, web application, or batch processing.
- #
- # https://www.nomadproject.io/docs/job-specification/task
- #
- task "${job_name}-task-1" {
- # The "driver" parameter specifies the task driver that should be used to
- # run the task.
- driver = "exec"
-
- %{ if use_host_volume }
- volume_mount {
- volume = "${job_name}-volume-1"
- destination = "${volume_destination}"
- read_only = false
- }
- %{ endif }
-
- %{ if use_vault_provider }
- vault {
- policies = "${vault_kv_policy_name}"
- }
- %{ endif }
-
- # The "config" stanza specifies the driver configuration, which is passed
- # directly to the driver to start the task. The details of configurations
- # are specific to each driver, so please see specific driver
- # documentation for more information.
- config {
- command = "local/alertmanager-${version}.linux-amd64/alertmanager"
- args = [
- "--config.file=secrets/alertmanager.yml"
- ]
- }
-
- # The artifact stanza instructs Nomad to fetch and unpack a remote resource,
- # such as a file, tarball, or binary. Nomad downloads artifacts using the
- # popular go-getter library, which permits downloading artifacts from a
- # variety of locations using a URL as the input source.
- #
- # https://www.nomadproject.io/docs/job-specification/artifact
- #
- artifact {
- source = "${url}"
- }
-
- # The "template" stanza instructs Nomad to manage a template, such as
- # a configuration file or script. This template can optionally pull data
- # from Consul or Vault to populate runtime configuration data.
- #
- # https://www.nomadproject.io/docs/job-specification/template
- #
- template {
- change_mode = "noop"
- change_signal = "SIGINT"
- destination = "secrets/alertmanager.yml"
- left_delimiter = "{{{"
- right_delimiter = "}}}"
- data = <<EOH
-# The directory from which notification templates are read.
-templates:
-- '/etc/alertmanager/template/*.tmpl'
-
-#tls_config:
-# # CA certificate to validate the server certificate with.
-# ca_file: <filepath> ]
-#
-# # Certificate and key files for client cert authentication to the server.
-# cert_file: <filepath>
-# key_file: <filepath>
-#
-# # ServerName extension to indicate the name of the server.
-# # http://tools.ietf.org/html/rfc4366#section-3.1
-# server_name: <string>
-#
-# # Disable validation of the server certificate.
-# insecure_skip_verify: true
-
-# The root route on which each incoming alert enters.
-route:
- receiver: '${slack_default_receiver}'
-
- # The labels by which incoming alerts are grouped together. For example,
- # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
- # be batched into a single group.
- #
- # To aggregate by all possible labels use '...' as the sole label name.
- # This effectively disables aggregation entirely, passing through all
- # alerts as-is. This is unlikely to be what you want, unless you have
- # a very low alert volume or your upstream notification system performs
- # its own grouping. Example: group_by: [...]
- group_by: ['alertname']
-
- # When a new group of alerts is created by an incoming alert, wait at
- # least 'group_wait' to send the initial notification.
- # This way ensures that you get multiple alerts for the same group that start
- # firing shortly after another are batched together on the first
- # notification.
- group_wait: 30s
-
- # When the first notification was sent, wait 'group_interval' to send a batch
- # of new alerts that started firing for that group.
- group_interval: 5m
-
- # If an alert has successfully been sent, wait 'repeat_interval' to
- # resend them.
- repeat_interval: 3h
-
- # All the above attributes are inherited by all child routes and can
- # overwritten on each.
- # The child route trees.
- routes:
- - match_re:
- alertname: JenkinsJob.*
- receiver: ${slack_jenkins_receiver}
- routes:
- - match:
- severity: critical
- receiver: '${slack_jenkins_receiver}'
-
- - match_re:
- service: .*
- receiver: ${slack_default_receiver}
- routes:
- - match:
- severity: critical
- receiver: '${slack_default_receiver}'
-
-# Inhibition rules allow to mute a set of alerts given that another alert is
-# firing.
-# We use this to mute any warning-level notifications if the same alert is
-# already critical.
-inhibit_rules:
-- source_match:
- severity: 'critical'
- target_match:
- severity: 'warning'
- equal: ['alertname', 'instance']
-
-receivers:
-- name: '${slack_jenkins_receiver}'
- slack_configs:
- - api_url: 'https://hooks.slack.com/services/${slack_jenkins_api_key}'
- channel: '#${slack_jenkins_channel}'
- send_resolved: true
- icon_url: https://avatars3.githubusercontent.com/u/3380462
- title: |-
- [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}
- {{- if gt (len .CommonLabels) (len .GroupLabels) -}}
- {{" "}}(
- {{- with .CommonLabels.Remove .GroupLabels.Names }}
- {{- range $index, $label := .SortedPairs -}}
- {{ if $index }}, {{ end }}
- {{- $label.Name }}="{{ $label.Value -}}"
- {{- end }}
- {{- end -}}
- )
- {{- end }}
- text: >-
- {{ range .Alerts -}}
- *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}
-
- *Description:* {{ .Annotations.description }}
-
- *Details:*
- {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
- {{ end }}
- {{ end }}
-
-- name: '${slack_default_receiver}'
- slack_configs:
- - api_url: 'https://hooks.slack.com/services/${slack_default_api_key}'
- channel: '#${slack_default_channel}'
- send_resolved: true
- icon_url: https://avatars3.githubusercontent.com/u/3380462
- title: |-
- [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}
- {{- if gt (len .CommonLabels) (len .GroupLabels) -}}
- {{" "}}(
- {{- with .CommonLabels.Remove .GroupLabels.Names }}
- {{- range $index, $label := .SortedPairs -}}
- {{ if $index }}, {{ end }}
- {{- $label.Name }}="{{ $label.Value -}}"
- {{- end }}
- {{- end -}}
- )
- {{- end }}
- text: >-
- {{ range .Alerts -}}
- *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}
-
- *Description:* {{ .Annotations.description }}
-
- *Details:*
- {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
- {{ end }}
- {{ end }}
-EOH
- }
-
- # The service stanza instructs Nomad to register a service with Consul.
- #
- # https://www.nomadproject.io/docs/job-specification/service
- #
- service {
- name = "${service_name}"
- port = "${service_name}"
- tags = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ]
- check {
- name = "Alertmanager Check Live"
- type = "http"
- path = "/-/healthy"
- interval = "10s"
- timeout = "2s"
- }
- }
-
- # The "resources" stanza describes the requirements a task needs to
- # execute. Resource requirements include memory, network, cpu, and more.
- # This ensures the task will execute on a machine that contains enough
- # resource capacity.
- #
- # https://www.nomadproject.io/docs/job-specification/resources
- #
- resources {
- cpu = ${cpu}
- memory = ${memory}
- }
- }
- }
-}
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/fdio/main.tf b/fdio.infra.terraform/1n_nmd/alertmanager/fdio/main.tf
deleted file mode 100644
index 745e450a8c..0000000000
--- a/fdio.infra.terraform/1n_nmd/alertmanager/fdio/main.tf
+++ /dev/null
@@ -1,14 +0,0 @@
-module "alertmanager" {
- providers = {
- nomad = nomad.yul1
- }
- source = "../"
-
- # alertmanager
- datacenters = ["yul1"]
- slack_jenkins_api_key = "TE07RD1V1/B01U1NV9HV3/hKZXJJ74g2JcISq4K3QC1eG9"
- slack_jenkins_channel = "fdio-jobs-monitoring"
- slack_default_api_key = "TE07RD1V1/B01UUK23B6C/hZTcCu42FUv8d6rtirHtcYIi"
- slack_default_channel = "fdio-infra-monitoring"
- am_version = "0.23.0"
-} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/fdio/providers.tf b/fdio.infra.terraform/1n_nmd/alertmanager/fdio/providers.tf
deleted file mode 100644
index 42a6a45ce0..0000000000
--- a/fdio.infra.terraform/1n_nmd/alertmanager/fdio/providers.tf
+++ /dev/null
@@ -1,13 +0,0 @@
-provider "nomad" {
- address = var.nomad_provider_address
- alias = "yul1"
- # ca_file = var.nomad_provider_ca_file
- # cert_file = var.nomad_provider_cert_file
- # key_file = var.nomad_provider_key_file
-}
-
-provider "vault" {
- address = var.vault_provider_address
- skip_tls_verify = var.vault_provider_skip_tls_verify
- token = var.vault_provider_token
-} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/fdio/variables.tf b/fdio.infra.terraform/1n_nmd/alertmanager/fdio/variables.tf
deleted file mode 100644
index 7d5be09d21..0000000000
--- a/fdio.infra.terraform/1n_nmd/alertmanager/fdio/variables.tf
+++ /dev/null
@@ -1,47 +0,0 @@
-variable "nomad_acl" {
- description = "Nomad ACLs enabled/disabled."
- type = bool
- default = false
-}
-
-variable "nomad_provider_address" {
- description = "FD.io Nomad cluster address."
- type = string
- default = "http://10.32.8.14:4646"
-}
-
-variable "nomad_provider_ca_file" {
- description = "A local file path to a PEM-encoded certificate authority."
- type = string
- default = "/etc/nomad.d/ssl/nomad-ca.pem"
-}
-
-variable "nomad_provider_cert_file" {
- description = "A local file path to a PEM-encoded certificate."
- type = string
- default = "/etc/nomad.d/ssl/nomad-cli.pem"
-}
-
-variable "nomad_provider_key_file" {
- description = "A local file path to a PEM-encoded private key."
- type = string
- default = "/etc/nomad.d/ssl/nomad-cli-key.pem"
-}
-
-variable "vault_provider_address" {
- description = "Vault cluster address."
- type = string
- default = "http://10.30.51.28:8200"
-}
-
-variable "vault_provider_skip_tls_verify" {
- description = "Verification of the Vault server's TLS certificate."
- type = bool
- default = false
-}
-
-variable "vault_provider_token" {
- description = "Vault root token."
- type = string
- sensitive = true
-} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/fdio/versions.tf b/fdio.infra.terraform/1n_nmd/alertmanager/fdio/versions.tf
deleted file mode 100644
index 385c5c3f18..0000000000
--- a/fdio.infra.terraform/1n_nmd/alertmanager/fdio/versions.tf
+++ /dev/null
@@ -1,17 +0,0 @@
-terraform {
- backend "consul" {
- address = "10.32.8.14:8500"
- scheme = "http"
- path = "terraform/alertmanager"
- }
- required_providers {
- nomad = {
- source = "hashicorp/nomad"
- version = ">= 1.4.16"
- }
- vault = {
- version = ">= 3.2.1"
- }
- }
- required_version = ">= 1.1.4"
-} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/main.tf b/fdio.infra.terraform/1n_nmd/alertmanager/main.tf
deleted file mode 100644
index e8a1389150..0000000000
--- a/fdio.infra.terraform/1n_nmd/alertmanager/main.tf
+++ /dev/null
@@ -1,48 +0,0 @@
-locals {
- datacenters = join(",", var.datacenters)
- url = join("",
- [
- "https://github.com",
- "/prometheus/alertmanager/releases/download/",
- "v${var.am_version}/",
- "alertmanager-${var.am_version}.linux-amd64.tar.gz"
- ]
- )
-}
-
-resource "nomad_job" "nomad_job_alertmanager" {
- jobspec = templatefile(
- "${path.module}/conf/nomad/alertmanager.hcl.tftpl",
- {
- auto_promote = var.auto_promote,
- auto_revert = var.auto_revert,
- canary = var.canary,
- cpu = var.cpu,
- datacenters = local.datacenters,
- group_count = var.group_count,
- job_name = var.job_name,
- max_parallel = var.max_parallel,
- memory = var.memory
- port = var.port,
- region = var.region,
- service_name = var.service_name,
- slack_jenkins_api_key = var.slack_jenkins_api_key,
- slack_jenkins_channel = var.slack_jenkins_channel,
- slack_jenkins_receiver = var.slack_jenkins_receiver,
- slack_default_api_key = var.slack_default_api_key,
- slack_default_channel = var.slack_default_channel,
- slack_default_receiver = var.slack_default_receiver,
- url = local.url,
- use_canary = var.use_canary,
- use_host_volume = var.use_host_volume,
- use_vault_provider = var.vault_secret.use_vault_provider,
- vault_kv_policy_name = var.vault_secret.vault_kv_policy_name,
- vault_kv_path = var.vault_secret.vault_kv_path,
- vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key,
- vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key,
- version = var.am_version,
- volume_destination = var.volume_destination,
- volume_source = var.volume_source
- })
- detach = false
-}
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/variables.tf b/fdio.infra.terraform/1n_nmd/alertmanager/variables.tf
deleted file mode 100644
index e452598fa6..0000000000
--- a/fdio.infra.terraform/1n_nmd/alertmanager/variables.tf
+++ /dev/null
@@ -1,157 +0,0 @@
-# Nomad
-variable "datacenters" {
- description = "Specifies the list of DCs to be considered placing this task"
- type = list(string)
- default = ["dc1"]
-}
-
-variable "region" {
- description = "Specifies the list of DCs to be considered placing this task"
- type = string
- default = "global"
-}
-
-variable "volume_source" {
- description = "The name of the volume to request"
- type = string
- default = "persistence"
-}
-
-# Alertmanager
-variable "am_version" {
- description = "Alertmanager version"
- type = string
- default = "0.21.0"
-}
-
-variable "auto_promote" {
- description = "Specifies if the job should auto-promote to the canary version"
- type = bool
- default = true
-}
-
-variable "auto_revert" {
- description = "Specifies if the job should auto-revert to the last stable job"
- type = bool
- default = true
-}
-
-variable "canary" {
- description = "Equal to the count of the task group allows blue/green depl."
- type = number
- default = 1
-}
-
-variable "cpu" {
- description = "CPU allocation"
- type = number
- default = 1000
-}
-
-variable "group_count" {
- description = "Specifies the number of the task groups running under this one"
- type = number
- default = 1
-}
-
-variable "job_name" {
- description = "Specifies a name for the job"
- type = string
- default = "alertmanager"
-}
-
-variable "max_parallel" {
- description = "Specifies the maximum number of updates to perform in parallel"
- type = number
- default = 1
-}
-
-variable "memory" {
- description = "Specifies the memory required in MB"
- type = number
- default = 1024
-}
-
-variable "port" {
- description = "Specifies the static TCP/UDP port to allocate"
- type = number
- default = 9093
-}
-
-variable "service_name" {
- description = "Specifies the name this service will be advertised in Consul"
- type = string
- default = "alertmanager"
-}
-
-variable "use_canary" {
- description = "Uses canary deployment"
- type = bool
- default = true
-}
-
-variable "use_host_volume" {
- description = "Use Nomad host volume feature"
- type = bool
- default = false
-}
-
-variable "vault_secret" {
- type = object({
- use_vault_provider = bool,
- vault_kv_policy_name = string,
- vault_kv_path = string,
- vault_kv_field_access_key = string,
- vault_kv_field_secret_key = string
- })
- description = "Set of properties to be able to fetch secret from vault."
- default = {
- use_vault_provider = false
- vault_kv_policy_name = "kv"
- vault_kv_path = "secret/data/alertmanager"
- vault_kv_field_access_key = "access_key"
- vault_kv_field_secret_key = "secret_key"
- }
-}
-
-variable "volume_destination" {
- description = "Specifies where the volume should be mounted inside the task"
- type = string
- default = "/data/"
-}
-
-variable "slack_jenkins_api_key" {
- description = "Alertmanager jenkins slack API key"
- type = string
- default = "XXXXXXXXX/XXXXXXXXXXX/XXXXXXXXXXXXXXXXXXXXXXXX"
-}
-
-variable "slack_jenkins_receiver" {
- description = "Alertmanager jenkins slack receiver"
- type = string
- default = "jenkins-slack-receiver"
-}
-
-variable "slack_jenkins_channel" {
- description = "Alertmanager jenkins slack channel"
- type = string
- default = "jenkins-channel"
-}
-
-variable "slack_default_api_key" {
- description = "Alertmanager default slack API key"
- type = string
- default = "XXXXXXXXX/XXXXXXXXXXX/XXXXXXXXXXXXXXXXXXXXXXXX"
-}
-
-variable "slack_default_receiver" {
- description = "Alertmanager default slack receiver"
- type = string
- default = "default-slack-receiver"
-}
-
-variable "slack_default_channel" {
- description = "Alertmanager default slack channel"
- type = string
- default = "default-channel"
-} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/versions.tf b/fdio.infra.terraform/1n_nmd/alertmanager/versions.tf
deleted file mode 100644
index 5f283ed4ea..0000000000
--- a/fdio.infra.terraform/1n_nmd/alertmanager/versions.tf
+++ /dev/null
@@ -1,9 +0,0 @@
-terraform {
- required_providers {
- nomad = {
- source = "hashicorp/nomad"
- version = ">= 1.4.16"
- }
- }
- required_version = ">= 1.1.4"
-} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/etl/conf/nomad/etl.hcl.tftpl b/fdio.infra.terraform/1n_nmd/etl/conf/nomad/etl.hcl.tftpl
deleted file mode 100644
index 208fb0a59f..0000000000
--- a/fdio.infra.terraform/1n_nmd/etl/conf/nomad/etl.hcl.tftpl
+++ /dev/null
@@ -1,318 +0,0 @@
-job "${job_name}" {
- # The "datacenters" parameter specifies the list of datacenters which should
- # be considered when placing this task. This must be provided.
- datacenters = "${datacenters}"
-
- # The "type" parameter controls the type of job, which impacts the scheduler's
- # decision on placement. For a full list of job types and their differences,
- # please see the online documentation.
- #
- # https://www.nomadproject.io/docs/jobspec/schedulers
- #
- type = "${type}"
-
- # The periodic stanza allows a job to run at fixed times, dates, or intervals.
- # The easiest way to think about the periodic scheduler is "Nomad cron" or
- # "distributed cron".
- #
- # https://www.nomadproject.io/docs/job-specification/periodic
- #
- periodic {
- cron = "${cron}"
- prohibit_overlap = "${prohibit_overlap}"
- time_zone = "${time_zone}"
- }
-
- # The "group" stanza defines a series of tasks that should be co-located on
- # the same Nomad client. Any task within a group will be placed on the same
- # client.
- #
- # https://www.nomadproject.io/docs/job-specification/group
- #
- group "${job_name}-master" {
- # The restart stanza configures a tasks's behavior on task failure. Restarts
- # happen on the client that is running the task.
- #
- # https://www.nomadproject.io/docs/job-specification/restart
- #
- restart {
- mode = "fail"
- }
-
- # The constraint allows restricting the set of eligible nodes. Constraints
- # may filter on attributes or client metadata.
- #
- # For more information and examples on the "volume" stanza, please see
- # the online documentation at:
- #
- # https://www.nomadproject.io/docs/job-specification/constraint
- #
- constraint {
- attribute = "$${attr.cpu.arch}"
- operator = "!="
- value = "arm64"
- }
-
- constraint {
- attribute = "$${node.class}"
- value = "builder"
- }
-
- # The "task" stanza creates an individual unit of work, such as a Docker
- # container, web application, or batch processing.
- #
- # https://www.nomadproject.io/docs/job-specification/task.html
- #
- task "${job_name}-trending" {
- # The artifact stanza instructs Nomad to fetch and unpack a remote
- # resource, such as a file, tarball, or binary.
- #
- # https://www.nomadproject.io/docs/job-specification/artifact
- #
- artifact {
- source = "git::https://github.com/FDio/csit"
- destination = "local/csit"
- }
-
- # The "driver" parameter specifies the task driver that should be used to
- # run the task.
- driver = "docker"
-
- # The "config" stanza specifies the driver configuration, which is passed
- # directly to the driver to start the task. The details of configurations
- # are specific to each driver, so please see specific driver
- # documentation for more information.
- config {
- image = "${image}"
- command = "gluesparksubmit"
- args = [
- "--driver-memory", "20g",
- "--executor-memory", "20g",
- "trending.py"
- ]
- work_dir = "/local/csit/csit.infra.etl"
- }
-
- # The env stanza configures a list of environment variables to populate
- # the task's environment before starting.
- env {
- AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
- AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
- AWS_DEFAULT_REGION = "${aws_default_region}"
- OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
- OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
- OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
- ${ envs }
- }
-
- # The "resources" stanza describes the requirements a task needs to
- # execute. Resource requirements include memory, network, cpu, and more.
- # This ensures the task will execute on a machine that contains enough
- # resource capacity.
- #
- # https://www.nomadproject.io/docs/job-specification/resources
- #
- resources {
- cpu = ${cpu}
- memory = ${memory}
- }
- }
- task "${job_name}-stats" {
- # The artifact stanza instructs Nomad to fetch and unpack a remote
- # resource, such as a file, tarball, or binary.
- #
- # https://www.nomadproject.io/docs/job-specification/artifact
- #
- artifact {
- source = "git::https://github.com/FDio/csit"
- destination = "local/csit"
- }
-
- # The "driver" parameter specifies the task driver that should be used to
- # run the task.
- driver = "docker"
-
- # The "config" stanza specifies the driver configuration, which is passed
- # directly to the driver to start the task. The details of configurations
- # are specific to each driver, so please see specific driver
- # documentation for more information.
- config {
- image = "${image}"
- command = "gluesparksubmit"
- args = [
- "--driver-memory", "10g",
- "--executor-memory", "10g",
- "stats.py"
- ]
- work_dir = "/local/csit/csit.infra.etl"
- }
-
- # The env stanza configures a list of environment variables to populate
- # the task's environment before starting.
- env {
- AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
- AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
- AWS_DEFAULT_REGION = "${aws_default_region}"
- OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
- OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
- OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
- ${ envs }
- }
-
- # The "resources" stanza describes the requirements a task needs to
- # execute. Resource requirements include memory, network, cpu, and more.
- # This ensures the task will execute on a machine that contains enough
- # resource capacity.
- #
- # https://www.nomadproject.io/docs/job-specification/resources
- #
- resources {
- cpu = ${cpu}
- memory = ${memory}
- }
- }
- }
- group "${job_name}-rls2202" {
- # The restart stanza configures a tasks's behavior on task failure. Restarts
- # happen on the client that is running the task.
- #
- # https://www.nomadproject.io/docs/job-specification/restart
- #
- restart {
- mode = "fail"
- }
-
- # The constraint allows restricting the set of eligible nodes. Constraints
- # may filter on attributes or client metadata.
- #
- # For more information and examples on the "volume" stanza, please see
- # the online documentation at:
- #
- # https://www.nomadproject.io/docs/job-specification/constraint
- #
- constraint {
- attribute = "$${attr.cpu.arch}"
- operator = "!="
- value = "arm64"
- }
-
- constraint {
- attribute = "$${node.class}"
- value = "builder"
- }
-
- # The "task" stanza creates an individual unit of work, such as a Docker
- # container, web application, or batch processing.
- #
- # https://www.nomadproject.io/docs/job-specification/task.html
- #
- task "${job_name}-coverage" {
- # The artifact stanza instructs Nomad to fetch and unpack a remote
- # resource, such as a file, tarball, or binary.
- #
- # https://www.nomadproject.io/docs/job-specification/artifact
- #
- artifact {
- source = "git::https://github.com/FDio/csit"
- destination = "local/csit"
- }
-
- # The "driver" parameter specifies the task driver that should be used to
- # run the task.
- driver = "docker"
-
- # The "config" stanza specifies the driver configuration, which is passed
- # directly to the driver to start the task. The details of configurations
- # are specific to each driver, so please see specific driver
- # documentation for more information.
- config {
- image = "${image}"
- command = "gluesparksubmit"
- args = [
- "--driver-memory", "20g",
- "--executor-memory", "20g",
- "coverage_rls2202.py"
- ]
- work_dir = "/local/csit/csit.infra.etl"
- }
-
- # The env stanza configures a list of environment variables to populate
- # the task's environment before starting.
- env {
- AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
- AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
- AWS_DEFAULT_REGION = "${aws_default_region}"
- OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
- OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
- OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
- ${ envs }
- }
-
- # The "resources" stanza describes the requirements a task needs to
- # execute. Resource requirements include memory, network, cpu, and more.
- # This ensures the task will execute on a machine that contains enough
- # resource capacity.
- #
- # https://www.nomadproject.io/docs/job-specification/resources
- #
- resources {
- cpu = ${cpu}
- memory = ${memory}
- }
- }
- task "${job_name}-iterative" {
- # The artifact stanza instructs Nomad to fetch and unpack a remote
- # resource, such as a file, tarball, or binary.
- #
- # https://www.nomadproject.io/docs/job-specification/artifact
- #
- artifact {
- source = "git::https://github.com/FDio/csit"
- destination = "local/csit"
- }
-
- # The "driver" parameter specifies the task driver that should be used to
- # run the task.
- driver = "docker"
-
- # The "config" stanza specifies the driver configuration, which is passed
- # directly to the driver to start the task. The details of configurations
- # are specific to each driver, so please see specific driver
- # documentation for more information.
- config {
- image = "${image}"
- command = "gluesparksubmit"
- args = [
- "--driver-memory", "20g",
- "--executor-memory", "20g",
- "iterative_rls2202.py"
- ]
- work_dir = "/local/csit/csit.infra.etl"
- }
-
- # The env stanza configures a list of environment variables to populate
- # the task's environment before starting.
- env {
- AWS_ACCESS_KEY_ID = "${aws_access_key_id}"
- AWS_SECRET_ACCESS_KEY = "${aws_secret_access_key}"
- AWS_DEFAULT_REGION = "${aws_default_region}"
- OUT_AWS_ACCESS_KEY_ID = "${out_aws_access_key_id}"
- OUT_AWS_SECRET_ACCESS_KEY = "${out_aws_secret_access_key}"
- OUT_AWS_DEFAULT_REGION = "${out_aws_default_region}"
- ${ envs }
- }
-
- # The "resources" stanza describes the requirements a task needs to
- # execute. Resource requirements include memory, network, cpu, and more.
- # This ensures the task will execute on a machine that contains enough
- # resource capacity.
- #
- # https://www.nomadproject.io/docs/job-specification/resources
- #
- resources {
- cpu = ${cpu}
- memory = ${memory}
- }
- }
- }
-}
diff --git a/fdio.infra.terraform/1n_nmd/etl/fdio/main.tf b/fdio.infra.terraform/1n_nmd/etl/fdio/main.tf
deleted file mode 100644
index 3d2026f0f9..0000000000
--- a/fdio.infra.terraform/1n_nmd/etl/fdio/main.tf
+++ /dev/null
@@ -1,23 +0,0 @@
-data "vault_generic_secret" "fdio_logs" {
- path = "kv/secret/data/etl/fdio_logs"
-}
-
-data "vault_generic_secret" "fdio_docs" {
- path = "kv/secret/data/etl/fdio_docs"
-}
-
-module "etl" {
- providers = {
- nomad = nomad.yul1
- }
- source = "../"
-
- aws_access_key_id = data.vault_generic_secret.fdio_logs.data["access_key"]
- aws_secret_access_key = data.vault_generic_secret.fdio_logs.data["secret_key"]
- aws_default_region = data.vault_generic_secret.fdio_logs.data["region"]
- out_aws_access_key_id = data.vault_generic_secret.fdio_docs.data["access_key"]
- out_aws_secret_access_key = data.vault_generic_secret.fdio_docs.data["secret_key"]
- out_aws_default_region = data.vault_generic_secret.fdio_docs.data["region"]
- cron = "@daily"
- datacenters = ["yul1"]
-}
diff --git a/fdio.infra.terraform/1n_nmd/etl/fdio/providers.tf b/fdio.infra.terraform/1n_nmd/etl/fdio/providers.tf
deleted file mode 100644
index c6617da02b..0000000000
--- a/fdio.infra.terraform/1n_nmd/etl/fdio/providers.tf
+++ /dev/null
@@ -1,13 +0,0 @@
-provider "nomad" {
- address = var.nomad_provider_address
- alias = "yul1"
- # ca_file = var.nomad_provider_ca_file
- # cert_file = var.nomad_provider_cert_file
- # key_file = var.nomad_provider_key_file
-}
-
-provider "vault" {
- address = var.vault_provider_address
- skip_tls_verify = var.vault_provider_skip_tls_verify
- token = var.vault_provider_token
-}
diff --git a/fdio.infra.terraform/1n_nmd/etl/fdio/variables.tf b/fdio.infra.terraform/1n_nmd/etl/fdio/variables.tf
deleted file mode 100644
index 0e0b3af622..0000000000
--- a/fdio.infra.terraform/1n_nmd/etl/fdio/variables.tf
+++ /dev/null
@@ -1,47 +0,0 @@
-variable "nomad_acl" {
- description = "Nomad ACLs enabled/disabled."
- type = bool
- default = false
-}
-
-variable "nomad_provider_address" {
- description = "FD.io Nomad cluster address."
- type = string
- default = "http://10.32.8.14:4646"
-}
-
-variable "nomad_provider_ca_file" {
- description = "A local file path to a PEM-encoded certificate authority."
- type = string
- default = "/etc/nomad.d/ssl/nomad-ca.pem"
-}
-
-variable "nomad_provider_cert_file" {
- description = "A local file path to a PEM-encoded certificate."
- type = string
- default = "/etc/nomad.d/ssl/nomad-cli.pem"
-}
-
-variable "nomad_provider_key_file" {
- description = "A local file path to a PEM-encoded private key."
- type = string
- default = "/etc/nomad.d/ssl/nomad-cli-key.pem"
-}
-
-variable "vault_provider_address" {
- description = "Vault cluster address."
- type = string
- default = "http://10.30.51.28:8200"
-}
-
-variable "vault_provider_skip_tls_verify" {
- description = "Verification of the Vault server's TLS certificate."
- type = bool
- default = false
-}
-
-variable "vault_provider_token" {
- description = "Vault root token."
- type = string
- sensitive = true
-}
diff --git a/fdio.infra.terraform/1n_nmd/etl/fdio/versions.tf b/fdio.infra.terraform/1n_nmd/etl/fdio/versions.tf
deleted file mode 100644
index 526e1d0df0..0000000000
--- a/fdio.infra.terraform/1n_nmd/etl/fdio/versions.tf
+++ /dev/null
@@ -1,17 +0,0 @@
-terraform {
- backend "consul" {
- address = "10.32.8.14:8500"
- scheme = "http"
- path = "terraform/etl"
- }
- required_providers {
- nomad = {
- source = "hashicorp/nomad"
- version = ">= 1.4.16"
- }
- vault = {
- version = ">= 3.2.1"
- }
- }
- required_version = ">= 1.1.4"
-}
diff --git a/fdio.infra.terraform/1n_nmd/etl/main.tf b/fdio.infra.terraform/1n_nmd/etl/main.tf
deleted file mode 100644
index c477da81a8..0000000000
--- a/fdio.infra.terraform/1n_nmd/etl/main.tf
+++ /dev/null
@@ -1,33 +0,0 @@
-locals {
- datacenters = join(",", var.datacenters)
- envs = join("\n", concat([], var.envs))
-}
-
-resource "nomad_job" "nomad_job_etl" {
- jobspec = templatefile(
- "${path.module}/conf/nomad/etl.hcl.tftpl",
- {
- aws_access_key_id = var.aws_access_key_id,
- aws_secret_access_key = var.aws_secret_access_key,
- aws_default_region = var.aws_default_region
- cpu = var.cpu,
- cron = var.cron,
- datacenters = local.datacenters,
- envs = local.envs,
- image = var.image,
- job_name = var.job_name,
- memory = var.memory,
- out_aws_access_key_id = var.out_aws_access_key_id,
- out_aws_secret_access_key = var.out_aws_secret_access_key,
- out_aws_default_region = var.out_aws_default_region
- prohibit_overlap = var.prohibit_overlap,
- time_zone = var.time_zone,
- type = var.type,
- use_vault_provider = var.vault_secret.use_vault_provider,
- vault_kv_policy_name = var.vault_secret.vault_kv_policy_name,
- vault_kv_path = var.vault_secret.vault_kv_path,
- vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key,
- vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key
- })
- detach = false
-}
diff --git a/fdio.infra.terraform/1n_nmd/etl/variables.tf b/fdio.infra.terraform/1n_nmd/etl/variables.tf
deleted file mode 100644
index 3c6c12a943..0000000000
--- a/fdio.infra.terraform/1n_nmd/etl/variables.tf
+++ /dev/null
@@ -1,115 +0,0 @@
-# Nomad
-variable "datacenters" {
- description = "Specifies the list of DCs to be considered placing this task."
- type = list(string)
- default = ["dc1"]
-}
-
-# ETL
-variable "aws_access_key_id" {
- description = "AWS access key."
- type = string
- default = "aws"
-}
-
-variable "aws_secret_access_key" {
- description = "AWS secret key"
- type = string
- default = "aws"
-}
-
-variable "aws_default_region" {
- description = "AWS region"
- type = string
- default = "aws"
-}
-
-variable "cpu" {
- description = "Specifies the CPU required to run this task in MHz."
- type = number
- default = 10000
-}
-
-variable "cron" {
- description = "Specifies a cron expression configuring the interval to launch."
- type = string
- default = "@daily"
-}
-
-variable "envs" {
- description = "Specifies ETL environment variables."
- type = list(string)
- default = []
-}
-
-variable "image" {
- description = "Specifies the Docker image to run."
- type = string
- default = "pmikus/docker-ubuntu-focal-aws-glue:latest"
-}
-
-variable "job_name" {
- description = "Specifies a name for the job."
- type = string
- default = "etl"
-}
-
-variable "memory" {
- description = "Specifies the memory required in MB."
- type = number
- default = 20000
-}
-
-variable "out_aws_access_key_id" {
- description = "AWS access key."
- type = string
- default = "aws"
-}
-
-variable "out_aws_secret_access_key" {
- description = "AWS secret key"
- type = string
- default = "aws"
-}
-
-variable "out_aws_default_region" {
- description = "AWS region"
- type = string
- default = "aws"
-}
-
-variable "prohibit_overlap" {
- description = "Specifies if this job should wait until previous completed."
- type = bool
- default = true
-}
-
-variable "time_zone" {
- description = "Specifies the time zone to evaluate the next launch interval."
- type = string
- default = "UTC"
-}
-
-variable "type" {
- description = "Specifies the Nomad scheduler to use."
- type = string
- default = "batch"
-}
-
-variable "vault_secret" {
- type = object({
- use_vault_provider = bool,
- vault_kv_policy_name = string,
- vault_kv_path = string,
- vault_kv_field_access_key = string,
- vault_kv_field_secret_key = string
- })
- description = "Set of properties to be able to fetch secret from vault."
- default = {
- use_vault_provider = false
- vault_kv_policy_name = "kv"
- vault_kv_path = "secret/data/etl"
- vault_kv_field_access_key = "access_key"
- vault_kv_field_secret_key = "secret_key"
- }
-}
diff --git a/fdio.infra.terraform/1n_nmd/etl/versions.tf b/fdio.infra.terraform/1n_nmd/etl/versions.tf
deleted file mode 100644
index a01708f28a..0000000000
--- a/fdio.infra.terraform/1n_nmd/etl/versions.tf
+++ /dev/null
@@ -1,9 +0,0 @@
-terraform {
- required_providers {
- nomad = {
- source = "hashicorp/nomad"
- version = ">= 1.4.16"
- }
- }
- required_version = ">= 1.1.4"
-}
diff --git a/fdio.infra.terraform/1n_nmd/main.tf b/fdio.infra.terraform/1n_nmd/main.tf
index 7cdd245b7a..24d5ff3efc 100644
--- a/fdio.infra.terraform/1n_nmd/main.tf
+++ b/fdio.infra.terraform/1n_nmd/main.tf
@@ -4,31 +4,6 @@
# and downstream modules can simply declare resources for that provider
# and have them automatically associated with the root provider
# configurations.
-module "grafana" {
- source = "./grafana"
- providers = {
- nomad = nomad.yul1
- }
-
- # nomad
- nomad_datacenters = ["yul1"]
-
- # grafana
- grafana_job_name = "prod-grafana"
- grafana_use_canary = true
- grafana_group_count = 1
- grafana_vault_secret = {
- use_vault_provider = false,
- vault_kv_policy_name = "kv-secret",
- vault_kv_path = "secret/data/grafana",
- vault_kv_field_access_key = "access_key",
- vault_kv_field_secret_key = "secret_key"
- }
- grafana_container_image = "grafana/grafana:7.3.7"
- grafana_cpu = 1000
- grafana_mem = 2048
- grafana_port = 3000
-}
#module "minio" {
# source = "./minio"
@@ -66,41 +41,6 @@ data "vault_generic_secret" "minio_creds" {
path = "kv/secret/data/minio"
}
-module "minio_s3_gateway" {
- source = "./minio_s3_gateway"
- providers = {
- nomad = nomad.yul1
- }
-
- # nomad
- datacenters = ["yul1"]
- volume_source = "prod-volume-data1-1"
-
- # minio
- job_name = "minio-s3-gateway"
- group_count = 4
- service_name = "minio"
- mode = "gateway"
- port_base = 9001
- port_console = 9002
- image = "minio/minio:latest"
- access_key = data.vault_generic_secret.minio_creds.data["access_key"]
- secret_key = data.vault_generic_secret.minio_creds.data["secret_key"]
- volume_destination = "/data/"
- use_host_volume = true
- use_canary = true
- envs = [
- "MINIO_BROWSER=\"off\"",
- "MINIO_CACHE=\"on\"",
- "MINIO_CACHE_DRIVES=\"/data/s3_cache1\"",
- "MINIO_CACHE_EXCLUDE=\"\"",
- "MINIO_CACHE_QUOTA=80",
- "MINIO_CACHE_AFTER=1",
- "MINIO_CACHE_WATERMARK_LOW=70",
- "MINIO_CACHE_WATERMARK_HIGH=90"
- ]
-}
-
#module "nginx" {
# source = "./nginx"
# providers = {
@@ -116,35 +56,6 @@ module "minio_s3_gateway" {
# nginx_use_host_volume = true
#}
-module "prometheus" {
- source = "./prometheus"
- providers = {
- nomad = nomad.yul1
- }
-
- # nomad
- nomad_datacenters = ["yul1"]
- nomad_host_volume = "prod-volume-data1-1"
-
- # prometheus
- prometheus_job_name = "prod-prometheus"
- prometheus_use_canary = true
- prometheus_group_count = 4
- prometheus_vault_secret = {
- use_vault_provider = false,
- vault_kv_policy_name = "kv-secret",
- vault_kv_path = "secret/data/prometheus",
- vault_kv_field_access_key = "access_key",
- vault_kv_field_secret_key = "secret_key"
- }
- prometheus_data_dir = "/data/"
- prometheus_use_host_volume = true
- prometheus_version = "2.28.1"
- prometheus_cpu = 2000
- prometheus_mem = 8192
- prometheus_port = 9090
-}
-
module "vpp_device" {
source = "./vpp_device"
providers = {
diff --git a/fdio.infra.terraform/1n_nmd/minio_s3_gateway/conf/nomad/minio.hcl b/fdio.infra.terraform/1n_nmd/minio_s3_gateway/conf/nomad/minio.hcl
deleted file mode 100644
index 6210040b0c..0000000000
--- a/fdio.infra.terraform/1n_nmd/minio_s3_gateway/conf/nomad/minio.hcl
+++ /dev/null
@@ -1,246 +0,0 @@
-job "${job_name}" {
- # The "region" parameter specifies the region in which to execute the job.
- # If omitted, this inherits the default region name of "global".
- # region = "${region}"
-
- # The "datacenters" parameter specifies the list of datacenters which should
- # be considered when placing this task. This must be provided.
- datacenters = "${datacenters}"
-
- # The "type" parameter controls the type of job, which impacts the scheduler's
- # decision on placement. This configuration is optional and defaults to
- # "service". For a full list of job types and their differences, please see
- # the online documentation.
- #
- # https://www.nomadproject.io/docs/jobspec/schedulers
- #
- type = "service"
-
- update {
- # The "max_parallel" parameter specifies the maximum number of updates to
- # perform in parallel. In this case, this specifies to update a single task
- # at a time.
- max_parallel = ${max_parallel}
-
- health_check = "checks"
-
- # The "min_healthy_time" parameter specifies the minimum time the allocation
- # must be in the healthy state before it is marked as healthy and unblocks
- # further allocations from being updated.
- min_healthy_time = "10s"
-
- # The "healthy_deadline" parameter specifies the deadline in which the
- # allocation must be marked as healthy after which the allocation is
- # automatically transitioned to unhealthy. Transitioning to unhealthy will
- # fail the deployment and potentially roll back the job if "auto_revert" is
- # set to true.
- healthy_deadline = "3m"
-
- # The "progress_deadline" parameter specifies the deadline in which an
- # allocation must be marked as healthy. The deadline begins when the first
- # allocation for the deployment is created and is reset whenever an allocation
- # as part of the deployment transitions to a healthy state. If no allocation
- # transitions to the healthy state before the progress deadline, the
- # deployment is marked as failed.
- progress_deadline = "10m"
-
-%{ if use_canary }
- # The "canary" parameter specifies that changes to the job that would result
- # in destructive updates should create the specified number of canaries
- # without stopping any previous allocations. Once the operator determines the
- # canaries are healthy, they can be promoted which unblocks a rolling update
- # of the remaining allocations at a rate of "max_parallel".
- #
- # Further, setting "canary" equal to the count of the task group allows
- # blue/green deployments. When the job is updated, a full set of the new
- # version is deployed and upon promotion the old version is stopped.
- canary = ${canary}
-
- # Specifies if the job should auto-promote to the canary version when all
- # canaries become healthy during a deployment. Defaults to false which means
- # canaries must be manually updated with the nomad deployment promote
- # command.
- auto_promote = ${auto_promote}
-
- # The "auto_revert" parameter specifies if the job should auto-revert to the
- # last stable job on deployment failure. A job is marked as stable if all the
- # allocations as part of its deployment were marked healthy.
- auto_revert = ${auto_revert}
-%{ endif }
- }
-
- # All groups in this job should be scheduled on different hosts.
- constraint {
- operator = "distinct_hosts"
- value = "true"
- }
-
- # The "group" stanza defines a series of tasks that should be co-located on
- # the same Nomad client. Any task within a group will be placed on the same
- # client.
- #
- # https://www.nomadproject.io/docs/job-specification/group
- #
- group "${job_name}-group-1" {
- # The "count" parameter specifies the number of the task groups that should
- # be running under this group. This value must be non-negative and defaults
- # to 1.
- count = ${group_count}
-
- # The volume stanza allows the group to specify that it requires a given
- # volume from the cluster. The key of the stanza is the name of the volume
- # as it will be exposed to task configuration.
- #
- # https://www.nomadproject.io/docs/job-specification/volume
- %{ if use_host_volume }
- volume "${job_name}-volume-1" {
- type = "host"
- read_only = false
- source = "${volume_source}"
- }
- %{ endif }
-
- # The restart stanza configures a tasks's behavior on task failure. Restarts
- # happen on the client that is running the task.
- #
- # https://www.nomadproject.io/docs/job-specification/restart
- #
- restart {
- interval = "30m"
- attempts = 40
- delay = "15s"
- mode = "delay"
- }
-
- # The network stanza specifies the networking requirements for the task
- # group, including the network mode and port allocations. When scheduling
- # jobs in Nomad they are provisioned across your fleet of machines along
- # with other jobs and services. Because you don't know in advance what host
- # your job will be provisioned on, Nomad will provide your tasks with
- # network configuration when they start up.
- #
- # https://www.nomadproject.io/docs/job-specification/network
- #
- network {
- port "base" {
- static = ${port_base}
- to = ${port_base}
- }
- port "console" {
- static = ${port_console}
- to = ${port_console}
- }
- }
-
- # The "task" stanza creates an individual unit of work, such as a Docker
- # container, web application, or batch processing.
- #
- # https://www.nomadproject.io/docs/job-specification/task.html
- #
- task "${job_name}-task-1" {
- # The "driver" parameter specifies the task driver that should be used to
- # run the task.
- driver = "exec"
-
- %{ if use_host_volume }
- volume_mount {
- volume = "${job_name}-volume-1"
- destination = "${volume_destination}"
- read_only = false
- }
- %{ endif }
-
- %{ if use_vault_provider }
- vault {
- policies = "${vault_kv_policy_name}"
- }
- %{ endif }
-
- # The "config" stanza specifies the driver configuration, which is passed
- # directly to the driver to start the task. The details of configurations
- # are specific to each driver, so please see specific driver
- # documentation for more information.
- config {
- args = [
- "${mode}", "s3",
- "-address", ":${port_base}",
- "-console-address", ":${port_console}"
- ]
- command = "local/minio"
- }
-
- # The artifact stanza instructs Nomad to fetch and unpack a remote resource,
- # such as a file, tarball, or binary. Nomad downloads artifacts using the
- # popular go-getter library, which permits downloading artifacts from a
- # variety of locations using a URL as the input source.
- #
- # For more information and examples on the "artifact" stanza, please see
- # the online documentation at:
- #
- # https://www.nomadproject.io/docs/job-specification/artifact
- #
- artifact {
- source = "https://dl.min.io/server/minio/release/linux-amd64/minio"
- }
-
- # The env stanza configures a list of environment variables to populate
- # the task's environment before starting.
- env {
-%{ if use_vault_provider }
-{{ with secret "${vault_kv_path}" }}
- MINIO_ROOT_USER = "{{ .Data.data.${vault_kv_field_access_key} }}"
- MINIO_ROOT_PASSWORD = "{{ .Data.data.${vault_kv_field_secret_key} }}"
-{{ end }}
-%{ else }
- MINIO_ROOT_USER = "${access_key}"
- MINIO_ROOT_PASSWORD = "${secret_key}"
- AWS_ACCESS_KEY_ID = "${access_key}"
- AWS_SECRET_ACCESS_KEY = "${secret_key}"
-%{ endif }
- ${ envs }
- }
-
- # The service stanza instructs Nomad to register a service with Consul.
- #
- # https://www.nomadproject.io/docs/job-specification/service
- #
- service {
- name = "${service_name}"
- port = "base"
- tags = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ]
- check {
- name = "Min.io Server HTTP Check Live"
- type = "http"
- port = "base"
- protocol = "http"
- method = "GET"
- path = "/minio/health/live"
- interval = "10s"
- timeout = "2s"
- }
- check {
- name = "Min.io Server HTTP Check Ready"
- type = "http"
- port = "base"
- protocol = "http"
- method = "GET"
- path = "/minio/health/ready"
- interval = "10s"
- timeout = "2s"
- }
- }
-
- # The "resources" stanza describes the requirements a task needs to
- # execute. Resource requirements include memory, network, cpu, and more.
- # This ensures the task will execute on a machine that contains enough
- # resource capacity.
- #
- # https://www.nomadproject.io/docs/job-specification/resources
- #
- resources {
- cpu = ${cpu}
- memory = ${memory}
- }
- }
- }
-}
diff --git a/fdio.infra.terraform/1n_nmd/minio_s3_gateway/main.tf b/fdio.infra.terraform/1n_nmd/minio_s3_gateway/main.tf
deleted file mode 100644
index 2ae3cac9c2..0000000000
--- a/fdio.infra.terraform/1n_nmd/minio_s3_gateway/main.tf
+++ /dev/null
@@ -1,51 +0,0 @@
-locals {
- datacenters = join(",", var.datacenters)
- envs = join("\n", concat([], var.envs))
- upstreams = jsonencode(var.upstreams)
-}
-
-data "template_file" "nomad_job_minio" {
- template = file("${path.module}/conf/nomad/minio.hcl")
- vars = {
- access_key = var.access_key
- auto_promote = var.auto_promote
- auto_revert = var.auto_revert
- canary = var.canary
- cpu = var.cpu
- cpu_proxy = var.resource_proxy.cpu
- datacenters = local.datacenters
- envs = local.envs
- group_count = var.group_count
- host = var.host
- image = var.image
- job_name = var.job_name
- max_parallel = var.max_parallel
- memory = var.memory
- memory_proxy = var.resource_proxy.memory
- mode = var.mode
- port_base = var.port_base
- port_console = var.port_console
- region = var.region
- secret_key = var.secret_key
- service_name = var.service_name
- use_canary = var.use_canary
- use_host_volume = var.use_host_volume
- upstreams = local.upstreams
- use_vault_kms = var.kms_variables.use_vault_kms
- use_vault_provider = var.vault_secret.use_vault_provider
- vault_address = var.kms_variables.vault_address
- vault_kms_approle_kv = var.kms_variables.vault_kms_approle_kv
- vault_kms_key_name = var.kms_variables.vault_kms_key_name
- vault_kv_policy_name = var.vault_secret.vault_kv_policy_name
- vault_kv_path = var.vault_secret.vault_kv_path
- vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key
- vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key
- volume_destination = var.volume_destination
- volume_source = var.volume_source
- }
-}
-
-resource "nomad_job" "nomad_job_minio" {
- jobspec = data.template_file.nomad_job_minio.rendered
- detach = false
-}
diff --git a/fdio.infra.terraform/1n_nmd/minio_s3_gateway/variables.tf b/fdio.infra.terraform/1n_nmd/minio_s3_gateway/variables.tf
deleted file mode 100644
index 6fb351df26..0000000000
--- a/fdio.infra.terraform/1n_nmd/minio_s3_gateway/variables.tf
+++ /dev/null
@@ -1,199 +0,0 @@
-# Nomad
-
-variable "datacenters" {
- description = "Specifies the list of DCs to be considered placing this task"
- type = list(string)
- default = ["dc1"]
-}
-
-variable "region" {
- description = "Specifies the list of DCs to be considered placing this task"
- type = string
- default = "global"
-}
-
-variable "volume_source" {
- description = "The name of the volume to request"
- type = string
- default = "persistence"
-}
-
-# Minio
-variable "access_key" {
- description = "Minio access key"
- type = string
- default = "minio"
-}
-
-variable "auto_promote" {
- description = "Specifies if the job should auto-promote to the canary version"
- type = bool
- default = true
-}
-
-variable "auto_revert" {
- description = "Specifies if the job should auto-revert to the last stable job"
- type = bool
- default = true
-}
-
-variable "canary" {
- description = "Equal to the count of the task group allows blue/green depl."
- type = number
- default = 1
-}
-
-variable "cpu" {
- description = "Specifies the CPU required to run this task in MHz"
- type = number
- default = 1000
-}
-
-variable "envs" {
- description = "Minio environment variables"
- type = list(string)
- default = []
-}
-
-variable "group_count" {
- description = "Specifies the number of the task groups running under this one"
- type = number
- default = 1
-}
-
-variable "host" {
- description = "Minio host"
- type = string
- default = "127.0.0.1"
-}
-
-variable "image" {
- description = "The Docker image to run"
- type = string
- default = "minio/minio:latest"
-}
-
-variable "job_name" {
- description = "Specifies a name for the job"
- type = string
- default = "minio"
-}
-
-variable "kms_variables" {
- type = object({
- use_vault_kms = string
- vault_address = string,
- vault_kms_approle_kv = string,
- vault_kms_key_name = string
- })
- description = "Set of properties to be able to transit secrets in vault"
- default = {
- use_vault_kms = false
- vault_address = "",
- vault_kms_approle_kv = "",
- vault_kms_key_name = ""
- }
-}
-
-variable "max_parallel" {
- description = "Specifies the maximum number of updates to perform in parallel"
- type = number
- default = 1
-}
-
-variable "memory" {
- description = "Specifies the memory required in MB"
- type = number
- default = 1024
-}
-
-variable "mode" {
- description = "Specifies the Minio mode"
- type = string
- default = "server"
-}
-
-variable "port_base" {
- description = "Specifies the static TCP/UDP port to allocate"
- type = number
- default = 9000
-}
-
-variable "port_console" {
- description = "Specifies the static TCP/UDP port to allocate"
- type = number
- default = 9001
-}
-
-variable "resource_proxy" {
- description = "Minio proxy resources"
- type = object({
- cpu = number,
- memory = number
- })
- default = {
- cpu = 2000,
- memory = 1024
- }
- validation {
- condition = var.resource_proxy.cpu >= 200 && var.resource_proxy.memory >= 128
- error_message = "Proxy resource must be at least: cpu=200, memory=128."
- }
-}
-
-variable "service_name" {
- description = "Specifies the name this service will be advertised in Consul"
- type = string
- default = "minio"
-}
-
-variable "secret_key" {
- description = "Minio secret key"
- type = string
- default = "minio"
-}
-
-variable "upstreams" {
- type = list(object({
- service_name = string,
- port = number,
- }))
- description = "List of upstream services"
- default = []
-}
-
-variable "use_canary" {
- description = "Uses canary deployment for Minio"
- type = bool
- default = false
-}
-
-variable "use_host_volume" {
- description = "Use Nomad host volume feature"
- type = bool
- default = false
-}
-
-variable "vault_secret" {
- type = object({
- use_vault_provider = bool,
- vault_kv_policy_name = string,
- vault_kv_path = string,
- vault_kv_field_access_key = string,
- vault_kv_field_secret_key = string
- })
- description = "Set of properties to be able to fetch secret from vault"
- default = {
- use_vault_provider = false
- vault_kv_policy_name = "kv"
- vault_kv_path = "secret/data/minio"
- vault_kv_field_access_key = "access_key"
- vault_kv_field_secret_key = "secret_key"
- }
-}
-
-variable "volume_destination" {
- description = "Specifies where the volume should be mounted inside the task"
- type = string
- default = "/data/"
-}
diff --git a/fdio.infra.terraform/1n_nmd/minio_s3_gateway/versions.tf b/fdio.infra.terraform/1n_nmd/minio_s3_gateway/versions.tf
deleted file mode 100644
index b80610a525..0000000000
--- a/fdio.infra.terraform/1n_nmd/minio_s3_gateway/versions.tf
+++ /dev/null
@@ -1,13 +0,0 @@
-terraform {
- required_providers {
- nomad = {
- source = "hashicorp/nomad"
- version = "~> 1.4.15"
- }
- template = {
- source = "hashicorp/template"
- version = "~> 2.2.0"
- }
- }
- required_version = ">= 1.0.3"
-}
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl.tftpl b/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl.tftpl
deleted file mode 100644
index 224f7e5e00..0000000000
--- a/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl.tftpl
+++ /dev/null
@@ -1,624 +0,0 @@
-job "${job_name}" {
- # The "region" parameter specifies the region in which to execute the job.
- # If omitted, this inherits the default region name of "global".
- # region = "${region}"
-
- # The "datacenters" parameter specifies the list of datacenters which should
- # be considered when placing this task. This must be provided.
- datacenters = "${datacenters}"
-
- # The "type" parameter controls the type of job, which impacts the scheduler's
- # decision on placement. This configuration is optional and defaults to
- # "service". For a full list of job types and their differences, please see
- # the online documentation.
- #
- # https://www.nomadproject.io/docs/jobspec/schedulers
- #
- type = "service"
-
- update {
- # The "max_parallel" parameter specifies the maximum number of updates to
- # perform in parallel. In this case, this specifies to update a single task
- # at a time.
- max_parallel = ${max_parallel}
-
- health_check = "checks"
-
- # The "min_healthy_time" parameter specifies the minimum time the allocation
- # must be in the healthy state before it is marked as healthy and unblocks
- # further allocations from being updated.
- min_healthy_time = "10s"
-
- # The "healthy_deadline" parameter specifies the deadline in which the
- # allocation must be marked as healthy after which the allocation is
- # automatically transitioned to unhealthy. Transitioning to unhealthy will
- # fail the deployment and potentially roll back the job if "auto_revert" is
- # set to true.
- healthy_deadline = "3m"
-
- # The "progress_deadline" parameter specifies the deadline in which an
- # allocation must be marked as healthy. The deadline begins when the first
- # allocation for the deployment is created and is reset whenever an allocation
- # as part of the deployment transitions to a healthy state. If no allocation
- # transitions to the healthy state before the progress deadline, the
- # deployment is marked as failed.
- progress_deadline = "10m"
-
-%{ if use_canary }
- # The "canary" parameter specifies that changes to the job that would result
- # in destructive updates should create the specified number of canaries
- # without stopping any previous allocations. Once the operator determines the
- # canaries are healthy, they can be promoted which unblocks a rolling update
- # of the remaining allocations at a rate of "max_parallel".
- #
- # Further, setting "canary" equal to the count of the task group allows
- # blue/green deployments. When the job is updated, a full set of the new
- # version is deployed and upon promotion the old version is stopped.
- canary = ${canary}
-
- # Specifies if the job should auto-promote to the canary version when all
- # canaries become healthy during a deployment. Defaults to false which means
- # canaries must be manually updated with the nomad deployment promote
- # command.
- auto_promote = ${auto_promote}
-
- # The "auto_revert" parameter specifies if the job should auto-revert to the
- # last stable job on deployment failure. A job is marked as stable if all the
- # allocations as part of its deployment were marked healthy.
- auto_revert = ${auto_revert}
-%{ endif }
- }
-
- # The "group" stanza defines a series of tasks that should be co-located on
- # the same Nomad client. Any task within a group will be placed on the same
- # client.
- #
- # https://www.nomadproject.io/docs/job-specification/group
- #
- group "${job_name}-group-1" {
- # The "count" parameter specifies the number of the task groups that should
- # be running under this group. This value must be non-negative and defaults
- # to 1.
- count = ${group_count}
-
- # The volume stanza allows the group to specify that it requires a given
- # volume from the cluster. The key of the stanza is the name of the volume
- # as it will be exposed to task configuration.
- #
- # https://www.nomadproject.io/docs/job-specification/volume
- %{ if use_host_volume }
- volume "${job_name}-volume-1" {
- type = "host"
- read_only = false
- source = "${volume_source}"
- }
- %{ endif }
-
- # The restart stanza configures a tasks's behavior on task failure. Restarts
- # happen on the client that is running the task.
- #
- # https://www.nomadproject.io/docs/job-specification/restart
- #
- restart {
- interval = "30m"
- attempts = 40
- delay = "15s"
- mode = "delay"
- }
-
- # The constraint allows restricting the set of eligible nodes. Constraints
- # may filter on attributes or client metadata.
- #
- # https://www.nomadproject.io/docs/job-specification/constraint
- #
- constraint {
- attribute = "$${attr.cpu.arch}"
- operator = "!="
- value = "arm64"
- }
-
- constraint {
- attribute = "$${node.class}"
- value = "builder"
- }
-
- # The network stanza specifies the networking requirements for the task
- # group, including the network mode and port allocations. When scheduling
- # jobs in Nomad they are provisioned across your fleet of machines along
- # with other jobs and services. Because you don't know in advance what host
- # your job will be provisioned on, Nomad will provide your tasks with
- # network configuration when they start up.
- #
- # https://www.nomadproject.io/docs/job-specification/network
- #
- network {
- port "${service_name}" {
- static = ${port}
- to = ${port}
- }
- }
-
- # The "task" stanza creates an individual unit of work, such as a Docker
- # container, web application, or batch processing.
- #
- # https://www.nomadproject.io/docs/job-specification/task
- #
- task "${job_name}-task-1" {
- # The "driver" parameter specifies the task driver that should be used to
- # run the task.
- driver = "exec"
-
- %{ if use_host_volume }
- volume_mount {
- volume = "${job_name}-volume-1"
- destination = "${volume_destination}"
- read_only = false
- }
- %{ endif }
-
- %{ if use_vault_provider }
- vault {
- policies = "${vault_kv_policy_name}"
- }
- %{ endif }
-
- # The "config" stanza specifies the driver configuration, which is passed
- # directly to the driver to start the task. The details of configurations
- # are specific to each driver, so please see specific driver
- # documentation for more information.
- config {
- command = "local/prometheus-${version}.linux-amd64/prometheus"
- args = [
- "--config.file=secrets/prometheus.yml",
- "--storage.tsdb.path=${volume_destination}prometheus/",
- "--storage.tsdb.retention.time=7d"
- ]
- }
-
- # The artifact stanza instructs Nomad to fetch and unpack a remote resource,
- # such as a file, tarball, or binary. Nomad downloads artifacts using the
- # popular go-getter library, which permits downloading artifacts from a
- # variety of locations using a URL as the input source.
- #
- # https://www.nomadproject.io/docs/job-specification/artifact
- #
- artifact {
- source = "${url}"
- }
-
- # The "template" stanza instructs Nomad to manage a template, such as
- # a configuration file or script. This template can optionally pull data
- # from Consul or Vault to populate runtime configuration data.
- #
- # https://www.nomadproject.io/docs/job-specification/template
- #
- template {
- change_mode = "noop"
- change_signal = "SIGINT"
- destination = "secrets/alerts.yml"
- left_delimiter = "{{{"
- right_delimiter = "}}}"
- data = <<EOH
----
-groups:
-- name: "Jenkins Job Health Exporter"
- rules:
- - alert: JenkinsJobHealthExporterFailures
- expr: jenkins_job_failure{id=~".*"} > jenkins_job_success{id=~".*"}
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: "Jenkins Job Health detected high failure rate on jenkins jobs."
- description: "Job: {{ $labels.id }}"
- - alert: JenkinsJobHealthExporterUnstable
- expr: jenkins_job_unstable{id=~".*"} > jenkins_job_success{id=~".*"}
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: "Jenkins Job Health detected high unstable rate on jenkins jobs."
- description: "Job: {{ $labels.id }}"
-- name: "Consul"
- rules:
- - alert: ConsulServiceHealthcheckFailed
- expr: consul_catalog_service_node_healthy == 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: "Consul service healthcheck failed (instance {{ $labels.instance }})."
- description: "Service: `{{ $labels.service_name }}` Healthcheck: `{{ $labels.service_id }}`."
- - alert: ConsulMissingMasterNode
- expr: consul_raft_peers < 3
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: "Consul missing master node (instance {{ $labels.instance }})."
- description: "Numbers of consul raft peers should be 3, in order to preserve quorum."
- - alert: ConsulAgentUnhealthy
- expr: consul_health_node_status{status="critical"} == 1
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: "Consul agent unhealthy (instance {{ $labels.instance }})."
- description: "A Consul agent is down."
-- name: "Hosts"
- rules:
- - alert: NodeDown
- expr: up == 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: "Prometheus target missing (instance {{ $labels.instance }})."
- description: "A Prometheus target has disappeared. An exporter might be crashed."
- - alert: HostOutOfMemory
- expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
- for: 2m
- labels:
- severity: warning
- annotations:
- summary: "Host out of memory (instance {{ $labels.instance }})."
- description: "Node memory is filling up (< 10% left)."
- - alert: HostOomKillDetected
- expr: increase(node_vmstat_oom_kill[1m]) > 0
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: "Host OOM kill detected (instance {{ $labels.instance }})."
- description: "OOM kill detected."
- - alert: HostMemoryUnderMemoryPressure
- expr: rate(node_vmstat_pgmajfault[1m]) > 1000
- for: 2m
- labels:
- severity: warning
- annotations:
- summary: "Host memory under memory pressure (instance {{ $labels.instance }})."
- description: "The node is under heavy memory pressure. High rate of major page faults."
- - alert: HostOutOfDiskSpace
- expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
- for: 2m
- labels:
- severity: warning
- annotations:
- summary: "Host out of disk space (instance {{ $labels.instance }})."
- description: "Disk is almost full (< 10% left)."
- - alert: HostRaidDiskFailure
- expr: node_md_disks{state="failed"} > 0
- for: 2m
- labels:
- severity: warning
- annotations:
- summary: "Host RAID disk failure (instance {{ $labels.instance }})."
- description: "At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap."
- - alert: HostConntrackLimit
- expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 0.8
- for: 5m
- labels:
- severity: warning
- annotations:
- summary: "Host conntrack limit (instance {{ $labels.instance }})."
- description: "The number of conntrack is approching limit."
- - alert: HostNetworkInterfaceSaturated
- expr: (rate(node_network_receive_bytes_total{device!~"^tap.*"}[1m]) + rate(node_network_transmit_bytes_total{device!~"^tap.*"}[1m])) / node_network_speed_bytes{device!~"^tap.*"} > 0.8
- for: 1m
- labels:
- severity: warning
- annotations:
- summary: "Host Network Interface Saturated (instance {{ $labels.instance }})."
- description: "The network interface {{ $labels.interface }} on {{ $labels.instance }} is getting overloaded."
- - alert: HostSystemdServiceCrashed
- expr: node_systemd_unit_state{state="failed"} == 1
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: "Host SystemD service crashed (instance {{ $labels.instance }})."
- description: "SystemD service crashed."
- - alert: HostEdacCorrectableErrorsDetected
- expr: increase(node_edac_correctable_errors_total[1m]) > 0
- for: 0m
- labels:
- severity: info
- annotations:
- summary: "Host EDAC Correctable Errors detected (instance {{ $labels.instance }})."
- description: '{{ $labels.instance }} has had {{ printf "%.0f" $value }} correctable memory errors reported by EDAC in the last 5 minutes.'
- - alert: HostEdacUncorrectableErrorsDetected
- expr: node_edac_uncorrectable_errors_total > 0
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: "Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }})."
- description: '{{ $labels.instance }} has had {{ printf "%.0f" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.'
-- name: "Min.io"
- rules:
- - alert: MinioDiskOffline
- expr: minio_offline_disks > 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: "Minio disk offline (instance {{ $labels.instance }})"
- description: "Minio disk is offline."
- - alert: MinioStorageSpaceExhausted
- expr: minio_disk_storage_free_bytes / 1024 / 1024 / 1024 < 10
- for: 2m
- labels:
- severity: warning
- annotations:
- summary: "Minio storage space exhausted (instance {{ $labels.instance }})."
- description: "Minio storage space is low (< 10 GB)."
-- name: "Prometheus"
- rules:
- - alert: PrometheusConfigurationReloadFailure
- expr: prometheus_config_last_reload_successful != 1
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: "Prometheus configuration reload failure (instance {{ $labels.instance }})."
- description: "Prometheus configuration reload error."
- - alert: PrometheusTooManyRestarts
- expr: changes(process_start_time_seconds{job=~"prometheus|pushgateway|alertmanager"}[15m]) > 2
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: "Prometheus too many restarts (instance {{ $labels.instance }})."
- description: "Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping."
- - alert: PrometheusAlertmanagerConfigurationReloadFailure
- expr: alertmanager_config_last_reload_successful != 1
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: "Prometheus AlertManager configuration reload failure (instance {{ $labels.instance }})."
- description: "AlertManager configuration reload error."
- - alert: PrometheusRuleEvaluationFailures
- expr: increase(prometheus_rule_evaluation_failures_total[3m]) > 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: "Prometheus rule evaluation failures (instance {{ $labels.instance }})."
- description: "Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts."
- - alert: PrometheusTargetScrapingSlow
- expr: prometheus_target_interval_length_seconds{quantile="0.9"} > 60
- for: 5m
- labels:
- severity: warning
- annotations:
- summary: "Prometheus target scraping slow (instance {{ $labels.instance }})."
- description: "Prometheus is scraping exporters slowly."
- - alert: PrometheusTsdbCompactionsFailed
- expr: increase(prometheus_tsdb_compactions_failed_total[1m]) > 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: "Prometheus TSDB compactions failed (instance {{ $labels.instance }})."
- description: "Prometheus encountered {{ $value }} TSDB compactions failures."
- - alert: PrometheusTsdbHeadTruncationsFailed
- expr: increase(prometheus_tsdb_head_truncations_failed_total[1m]) > 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: "Prometheus TSDB head truncations failed (instance {{ $labels.instance }})."
- description: "Prometheus encountered {{ $value }} TSDB head truncation failures."
- - alert: PrometheusTsdbWalCorruptions
- expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) > 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: "Prometheus TSDB WAL corruptions (instance {{ $labels.instance }})."
- description: "Prometheus encountered {{ $value }} TSDB WAL corruptions."
- - alert: PrometheusTsdbWalTruncationsFailed
- expr: increase(prometheus_tsdb_wal_truncations_failed_total[1m]) > 0
- for: 0m
- labels:
- severity: critical
- annotations:
- summary: "Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }})."
- description: "Prometheus encountered {{ $value }} TSDB WAL truncation failures."
-EOH
- }
-
- template {
- change_mode = "noop"
- change_signal = "SIGINT"
- destination = "secrets/prometheus.yml"
- data = <<EOH
----
-global:
- scrape_interval: 5s
- scrape_timeout: 5s
- evaluation_interval: 5s
-
-alerting:
- alertmanagers:
- - consul_sd_configs:
- - server: '{{ env "NOMAD_IP_prometheus" }}:8500'
- services: [ 'alertmanager' ]
-
-rule_files:
- - 'alerts.yml'
-
-scrape_configs:
-
- - job_name: 'Nomad Cluster'
- consul_sd_configs:
- - server: '{{ env "NOMAD_IP_prometheus" }}:8500'
- services: [ 'nomad-client', 'nomad' ]
- relabel_configs:
- - source_labels: [__meta_consul_tags]
- regex: '(.*)http(.*)'
- action: keep
- metrics_path: /v1/metrics
- params:
- format: [ 'prometheus' ]
-
- - job_name: 'Consul Cluster'
- static_configs:
- - targets: [ '10.30.51.22:8500' ]
- - targets: [ '10.30.51.24:8500' ]
- - targets: [ '10.30.51.25:8500' ]
- - targets: [ '10.30.51.26:8500' ]
- - targets: [ '10.30.51.28:8500' ]
- - targets: [ '10.30.51.29:8500' ]
- - targets: [ '10.30.51.30:8500' ]
- - targets: [ '10.30.51.39:8500' ]
- - targets: [ '10.30.51.40:8500' ]
- - targets: [ '10.30.51.50:8500' ]
- - targets: [ '10.30.51.51:8500' ]
- - targets: [ '10.30.51.65:8500' ]
- - targets: [ '10.30.51.66:8500' ]
- - targets: [ '10.30.51.67:8500' ]
- - targets: [ '10.30.51.68:8500' ]
- - targets: [ '10.30.51.70:8500' ]
- - targets: [ '10.30.51.71:8500' ]
- - targets: [ '10.32.8.14:8500' ]
- - targets: [ '10.32.8.15:8500' ]
- - targets: [ '10.32.8.16:8500' ]
- - targets: [ '10.32.8.17:8500' ]
- metrics_path: /v1/agent/metrics
- params:
- format: [ 'prometheus' ]
-
- - job_name: 'Blackbox Exporter (icmp)'
- static_configs:
- - targets: [ 'gerrit.fd.io' ]
- - targets: [ 'jenkins.fd.io' ]
- - targets: [ '10.32.8.17' ]
- params:
- module: [ 'icmp_v4' ]
- relabel_configs:
- - source_labels: [__address__]
- target_label: __param_target
- - source_labels: [__param_target]
- target_label: instance
- - target_label: __address__
- replacement: localhost:9115
- metrics_path: /probe
-
- - job_name: 'Blackbox Exporter (http)'
- static_configs:
- - targets: [ 'gerrit.fd.io' ]
- - targets: [ 'jenkins.fd.io' ]
- params:
- module: [ 'http_2xx' ]
- relabel_configs:
- - source_labels: [__address__]
- target_label: __param_target
- - source_labels: [__param_target]
- target_label: instance
- - target_label: __address__
- replacement: localhost:9115
- metrics_path: /probe
-
- - job_name: 'Jenkins Job Health Exporter'
- static_configs:
- - targets: [ '10.30.51.22:9186' ]
- metric_relabel_configs:
- - source_labels: [ __name__ ]
- regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'
- action: replace
- replacement: '$1'
- target_label: id
- - source_labels: [ __name__ ]
- regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$'
- replacement: 'jenkins_job_$2'
- target_label: __name__
-
- - job_name: 'Node Exporter'
- static_configs:
- - targets: [ '10.30.51.22:9100' ]
- - targets: [ '10.30.51.24:9100' ]
- - targets: [ '10.30.51.25:9100' ]
- - targets: [ '10.30.51.26:9100' ]
- - targets: [ '10.30.51.28:9100' ]
- - targets: [ '10.30.51.29:9100' ]
- - targets: [ '10.30.51.30:9100' ]
- - targets: [ '10.30.51.39:9100' ]
- - targets: [ '10.30.51.40:9100' ]
- - targets: [ '10.30.51.50:9100' ]
- - targets: [ '10.30.51.51:9100' ]
- - targets: [ '10.30.51.65:9100' ]
- - targets: [ '10.30.51.66:9100' ]
- - targets: [ '10.30.51.67:9100' ]
- - targets: [ '10.30.51.68:9100' ]
- - targets: [ '10.30.51.70:9100' ]
- - targets: [ '10.30.51.71:9100' ]
- - targets: [ '10.32.8.14:9100' ]
- - targets: [ '10.32.8.15:9100' ]
- - targets: [ '10.32.8.16:9100' ]
- - targets: [ '10.32.8.17:9100' ]
-
- - job_name: 'Alertmanager'
- consul_sd_configs:
- - server: '{{ env "NOMAD_IP_prometheus" }}:8500'
- services: [ 'alertmanager' ]
-
- - job_name: 'Grafana'
- consul_sd_configs:
- - server: '{{ env "NOMAD_IP_prometheus" }}:8500'
- services: [ 'grafana' ]
-
- - job_name: 'Prometheus'
- consul_sd_configs:
- - server: '{{ env "NOMAD_IP_prometheus" }}:8500'
- services: [ 'prometheus' ]
-
- - job_name: 'Minio'
- bearer_token: eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ3NjQ1ODEzMzcsImlzcyI6InByb21ldGhldXMiLCJzdWIiOiJtaW5pbyJ9.oeTw3EIaiFmlDikrHXWiWXMH2vxLfDLkfjEC7G2N3M_keH_xyA_l2ofLLNYtopa_3GCEZnxLQdPuFZrmgpkDWg
- consul_sd_configs:
- - server: '{{ env "NOMAD_IP_prometheus" }}:8500'
- services: [ 'storage' ]
- metrics_path: /minio/prometheus/metrics
-
- - job_name: 'Minio Proxy'
- bearer_token: eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ3OTAwNjE1NDIsImlzcyI6InByb21ldGhldXMiLCJzdWIiOiJBS0lBUTJBSDdZUFBXVDZDV1hYSSJ9.CU9x9j-yO0_Uta5iep6yqNiGQPolrr2608E3lpU6Yg21rIv_eOwS5zqzXaSvrhzkJP9H5kO1Pj6kqjYhbqjN_w
- consul_sd_configs:
- - server: '{{ env "NOMAD_IP_prometheus" }}:8500'
- services: [ 'minio' ]
- metrics_path: /minio/v2/metrics/cluster
-EOH
- }
-
- # The service stanza instructs Nomad to register a service with Consul.
- #
- # https://www.nomadproject.io/docs/job-specification/service
- #
- service {
- name = "${service_name}"
- port = "${service_name}"
- tags = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ]
- check {
- name = "Prometheus Check Live"
- type = "http"
- path = "/-/healthy"
- interval = "10s"
- timeout = "2s"
- }
- }
-
- # The "resources" stanza describes the requirements a task needs to
- # execute. Resource requirements include memory, network, cpu, and more.
- # This ensures the task will execute on a machine that contains enough
- # resource capacity.
- #
- # https://www.nomadproject.io/docs/job-specification/resources
- #
- resources {
- cpu = ${cpu}
- memory = ${memory}
- }
- }
- }
-}
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/fdio/main.tf b/fdio.infra.terraform/1n_nmd/prometheus/fdio/main.tf
deleted file mode 100644
index e0ca417a78..0000000000
--- a/fdio.infra.terraform/1n_nmd/prometheus/fdio/main.tf
+++ /dev/null
@@ -1,10 +0,0 @@
-module "prometheus" {
- providers = {
- nomad = nomad.yul1
- }
- source = "../"
-
- # prometheus
- datacenters = ["yul1"]
- pm_version = "2.33.1"
-} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/fdio/providers.tf b/fdio.infra.terraform/1n_nmd/prometheus/fdio/providers.tf
deleted file mode 100644
index 42a6a45ce0..0000000000
--- a/fdio.infra.terraform/1n_nmd/prometheus/fdio/providers.tf
+++ /dev/null
@@ -1,13 +0,0 @@
-provider "nomad" {
- address = var.nomad_provider_address
- alias = "yul1"
- # ca_file = var.nomad_provider_ca_file
- # cert_file = var.nomad_provider_cert_file
- # key_file = var.nomad_provider_key_file
-}
-
-provider "vault" {
- address = var.vault_provider_address
- skip_tls_verify = var.vault_provider_skip_tls_verify
- token = var.vault_provider_token
-} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/fdio/variables.tf b/fdio.infra.terraform/1n_nmd/prometheus/fdio/variables.tf
deleted file mode 100644
index 7d5be09d21..0000000000
--- a/fdio.infra.terraform/1n_nmd/prometheus/fdio/variables.tf
+++ /dev/null
@@ -1,47 +0,0 @@
-variable "nomad_acl" {
- description = "Nomad ACLs enabled/disabled."
- type = bool
- default = false
-}
-
-variable "nomad_provider_address" {
- description = "FD.io Nomad cluster address."
- type = string
- default = "http://10.32.8.14:4646"
-}
-
-variable "nomad_provider_ca_file" {
- description = "A local file path to a PEM-encoded certificate authority."
- type = string
- default = "/etc/nomad.d/ssl/nomad-ca.pem"
-}
-
-variable "nomad_provider_cert_file" {
- description = "A local file path to a PEM-encoded certificate."
- type = string
- default = "/etc/nomad.d/ssl/nomad-cli.pem"
-}
-
-variable "nomad_provider_key_file" {
- description = "A local file path to a PEM-encoded private key."
- type = string
- default = "/etc/nomad.d/ssl/nomad-cli-key.pem"
-}
-
-variable "vault_provider_address" {
- description = "Vault cluster address."
- type = string
- default = "http://10.30.51.28:8200"
-}
-
-variable "vault_provider_skip_tls_verify" {
- description = "Verification of the Vault server's TLS certificate."
- type = bool
- default = false
-}
-
-variable "vault_provider_token" {
- description = "Vault root token."
- type = string
- sensitive = true
-} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/fdio/versions.tf b/fdio.infra.terraform/1n_nmd/prometheus/fdio/versions.tf
deleted file mode 100644
index f83709d154..0000000000
--- a/fdio.infra.terraform/1n_nmd/prometheus/fdio/versions.tf
+++ /dev/null
@@ -1,17 +0,0 @@
-terraform {
- backend "consul" {
- address = "10.32.8.14:8500"
- scheme = "http"
- path = "terraform/prometheus"
- }
- required_providers {
- nomad = {
- source = "hashicorp/nomad"
- version = ">= 1.4.16"
- }
- vault = {
- version = ">= 3.2.1"
- }
- }
- required_version = ">= 1.1.4"
-} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/main.tf b/fdio.infra.terraform/1n_nmd/prometheus/main.tf
deleted file mode 100644
index 0a4d987831..0000000000
--- a/fdio.infra.terraform/1n_nmd/prometheus/main.tf
+++ /dev/null
@@ -1,42 +0,0 @@
-locals {
- datacenters = join(",", var.datacenters)
- url = join("",
- [
- "https://github.com",
- "/prometheus/prometheus/releases/download/",
- "v${var.pm_version}/",
- "prometheus-${var.pm_version}.linux-amd64.tar.gz"
- ]
- )
-}
-
-resource "nomad_job" "nomad_job_prometheus" {
- jobspec = templatefile(
- "${path.module}/conf/nomad/prometheus.hcl.tftpl",
- {
- auto_promote = var.auto_promote,
- auto_revert = var.auto_revert,
- canary = var.canary,
- cpu = var.cpu,
- datacenters = local.datacenters,
- group_count = var.group_count,
- job_name = var.job_name,
- max_parallel = var.max_parallel,
- memory = var.memory
- port = var.port,
- region = var.region,
- service_name = var.service_name,
- url = local.url,
- use_canary = var.use_canary,
- use_host_volume = var.use_host_volume,
- use_vault_provider = var.vault_secret.use_vault_provider,
- vault_kv_policy_name = var.vault_secret.vault_kv_policy_name,
- vault_kv_path = var.vault_secret.vault_kv_path,
- vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key,
- vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key,
- version = var.pm_version,
- volume_destination = var.volume_destination,
- volume_source = var.volume_source
- })
- detach = false
-}
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/variables.tf b/fdio.infra.terraform/1n_nmd/prometheus/variables.tf
deleted file mode 100644
index eab4d3b466..0000000000
--- a/fdio.infra.terraform/1n_nmd/prometheus/variables.tf
+++ /dev/null
@@ -1,127 +0,0 @@
-# Nomad
-variable "datacenters" {
- description = "Specifies the list of DCs to be considered placing this task"
- type = list(string)
- default = ["dc1"]
-}
-
-variable "region" {
- description = "Specifies the list of DCs to be considered placing this task"
- type = string
- default = "global"
-}
-
-variable "volume_source" {
- description = "The name of the volume to request"
- type = string
- default = "prod-volume-data1-1"
-}
-
-# Prometheus
-variable "pm_version" {
- description = "Prometheus version"
- type = string
- default = "2.33.1"
-}
-
-variable "auto_promote" {
- description = "Specifies if the job should auto-promote to the canary version"
- type = bool
- default = true
-}
-
-variable "auto_revert" {
- description = "Specifies if the job should auto-revert to the last stable job"
- type = bool
- default = true
-}
-
-variable "canary" {
- description = "Equal to the count of the task group allows blue/green depl."
- type = number
- default = 1
-}
-
-variable "cpu" {
- description = "CPU allocation"
- type = number
- default = 2000
-}
-
-variable "data_dir" {
- description = "Prometheus DISK allocation"
- type = string
- default = "/data"
-}
-
-variable "group_count" {
- description = "Specifies the number of the task groups running under this one"
- type = number
- default = 4
-}
-
-variable "job_name" {
- description = "Specifies a name for the job"
- type = string
- default = "prometheus"
-}
-
-variable "max_parallel" {
- description = "Specifies the maximum number of updates to perform in parallel"
- type = number
- default = 1
-}
-
-variable "memory" {
- description = "Specifies the memory required in MB"
- type = number
- default = 4096
-}
-
-variable "port" {
- description = "Specifies the static TCP/UDP port to allocate"
- type = number
- default = 9090
-}
-
-variable "service_name" {
- description = "Specifies the name this service will be advertised in Consul"
- type = string
- default = "prometheus"
-}
-
-variable "use_canary" {
- description = "Uses canary deployment"
- type = bool
- default = true
-}
-
-variable "use_host_volume" {
- description = "Use Nomad host volume feature"
- type = bool
- default = true
-}
-
-variable "volume_destination" {
- description = "Specifies where the volume should be mounted inside the task"
- type = string
- default = "/data/"
-}
-
-variable "vault_secret" {
- type = object({
- use_vault_provider = bool,
- vault_kv_policy_name = string,
- vault_kv_path = string,
- vault_kv_field_access_key = string,
- vault_kv_field_secret_key = string
- })
- description = "Set of properties to be able to fetch secret from vault."
- default = {
- use_vault_provider = false
- vault_kv_policy_name = "kv"
- vault_kv_path = "secret/data/prometheus"
- vault_kv_field_access_key = "access_key"
- vault_kv_field_secret_key = "secret_key"
- }
-}
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/versions.tf b/fdio.infra.terraform/1n_nmd/prometheus/versions.tf
deleted file mode 100644
index a01708f28a..0000000000
--- a/fdio.infra.terraform/1n_nmd/prometheus/versions.tf
+++ /dev/null
@@ -1,9 +0,0 @@
-terraform {
- required_providers {
- nomad = {
- source = "hashicorp/nomad"
- version = ">= 1.4.16"
- }
- }
- required_version = ">= 1.1.4"
-}
diff --git a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/main.tf b/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/main.tf
deleted file mode 100644
index 4473dafda8..0000000000
--- a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/main.tf
+++ /dev/null
@@ -1,17 +0,0 @@
-module "fdio-logs" {
- # fdio logs iam
- source = "../"
- name = "dynamic-aws-creds-vault-fdio-logs"
-}
-
-module "fdio-docs" {
- # fdio docs iam
- source = "../"
- name = "dynamic-aws-creds-vault-fdio-docs"
-}
-
-module "fdio-csit-jenkins" {
- # fdio csit jenkins iam
- source = "../"
- name = "dynamic-aws-creds-vault-fdio-csit-jenkins"
-}
diff --git a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/providers.tf b/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/providers.tf
deleted file mode 100644
index 102fd31b87..0000000000
--- a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/providers.tf
+++ /dev/null
@@ -1,5 +0,0 @@
-provider "vault" {
- address = var.vault_provider_address
- skip_tls_verify = var.vault_provider_skip_tls_verify
- token = var.vault_provider_token
-} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/variables.tf b/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/variables.tf
deleted file mode 100644
index e36ed08473..0000000000
--- a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/variables.tf
+++ /dev/null
@@ -1,17 +0,0 @@
-variable "vault_provider_address" {
- description = "Vault cluster address."
- type = string
- default = "http://10.30.51.28:8200"
-}
-
-variable "vault_provider_skip_tls_verify" {
- description = "Verification of the Vault server's TLS certificate"
- type = bool
- default = false
-}
-
-variable "vault_provider_token" {
- description = "Vault root token"
- type = string
- sensitive = true
-}
diff --git a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/versions.tf b/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/versions.tf
deleted file mode 100644
index ec03c7c9ee..0000000000
--- a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/fdio/versions.tf
+++ /dev/null
@@ -1,13 +0,0 @@
-terraform {
- backend "consul" {
- address = "consul.service.consul:8500"
- scheme = "http"
- path = "fdio/terraform/1n/nomad"
- }
- required_providers {
- vault = {
- version = ">= 3.2.1"
- }
- }
- required_version = ">= 1.1.4"
-}
diff --git a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/main.tf b/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/main.tf
deleted file mode 100644
index a65c390792..0000000000
--- a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/main.tf
+++ /dev/null
@@ -1,37 +0,0 @@
-resource "vault_aws_secret_backend" "aws" {
- access_key = var.aws_access_key
- secret_key = var.aws_secret_key
- path = "${var.name}-path"
-
- default_lease_ttl_seconds = "0"
- max_lease_ttl_seconds = "0"
-}
-
-resource "vault_aws_secret_backend_role" "admin" {
- backend = vault_aws_secret_backend.aws.path
- name = "${var.name}-role"
- credential_type = "iam_user"
-
- policy_document = <<EOF
-{
- "Version": "2012-10-17",
- "Statement": [
- {
- "Effect": "Allow",
- "Action": [
- "iam:*", "ec2:*"
- ],
- "Resource": "*"
- }
- ]
-}
-EOF
-}
-
-output "backend" {
- value = vault_aws_secret_backend.aws.path
-}
-
-output "role" {
- value = vault_aws_secret_backend_role.admin.name
-}
diff --git a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/variables.tf b/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/variables.tf
deleted file mode 100644
index 2545345185..0000000000
--- a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/variables.tf
+++ /dev/null
@@ -1,17 +0,0 @@
-variable "aws_access_key" {
- description = "AWS access key"
- type = string
- sensitive = true
-}
-
-variable "aws_secret_key" {
- description = "AWS secret key"
- type = string
- sensitive = true
-}
-
-variable "name" {
- default = "dynamic-aws-creds-vault"
- description = "Vault path"
- type = string
-}
diff --git a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/versions.tf b/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/versions.tf
deleted file mode 100644
index 996288568d..0000000000
--- a/fdio.infra.terraform/1n_nmd/vault-aws-secret-backend/versions.tf
+++ /dev/null
@@ -1,8 +0,0 @@
-terraform {
- required_providers {
- vault = {
- version = ">=2.22.1"
- }
- }
- required_version = ">= 1.1.4"
-}