aboutsummaryrefslogtreecommitdiffstats
path: root/fdio.infra.terraform/1n_nmd/alertmanager
diff options
context:
space:
mode:
authorPeter Mikus <pmikus@cisco.com>2022-02-09 09:58:09 +0100
committerPeter Mikus <pmikus@cisco.com>2022-02-09 08:59:07 +0000
commit0bbb81c4fd1afdee6eb23ba4d49171d8dced6b19 (patch)
treecf806d4fdcad01bcf8115f6784847f0fdc765363 /fdio.infra.terraform/1n_nmd/alertmanager
parent0576c4293e05b1eded486b48a217495451a8b685 (diff)
feat(terraform): Refactor Alertmanager
- prepare for ETL Signed-off-by: Peter Mikus <pmikus@cisco.com> Change-Id: I8931f76f78b5acee39716398b92e4b107d399773
Diffstat (limited to 'fdio.infra.terraform/1n_nmd/alertmanager')
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl.tftpl (renamed from fdio.infra.terraform/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl)163
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/fdio/main.tf14
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/fdio/providers.tf13
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/fdio/variables.tf47
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/fdio/versions.tf17
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/main.tf70
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/variables.tf131
-rw-r--r--fdio.infra.terraform/1n_nmd/alertmanager/versions.tf10
8 files changed, 305 insertions, 160 deletions
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl b/fdio.infra.terraform/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl.tftpl
index ab92761ac2..d1bb8e85cd 100644
--- a/fdio.infra.terraform/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl
+++ b/fdio.infra.terraform/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl.tftpl
@@ -1,28 +1,26 @@
job "${job_name}" {
# The "region" parameter specifies the region in which to execute the job.
# If omitted, this inherits the default region name of "global".
- # region = "global"
- #
+ # region = "${region}"
+
# The "datacenters" parameter specifies the list of datacenters which should
# be considered when placing this task. This must be provided.
- datacenters = "${datacenters}"
+ datacenters = "${datacenters}"
# The "type" parameter controls the type of job, which impacts the scheduler's
# decision on placement. This configuration is optional and defaults to
# "service". For a full list of job types and their differences, please see
# the online documentation.
#
- # For more information, please see the online documentation at:
- #
# https://www.nomadproject.io/docs/jobspec/schedulers
#
- type = "service"
+ type = "service"
update {
# The "max_parallel" parameter specifies the maximum number of updates to
# perform in parallel. In this case, this specifies to update a single task
# at a time.
- max_parallel = 1
+ max_parallel = ${max_parallel}
health_check = "checks"
@@ -56,46 +54,51 @@ job "${job_name}" {
# Further, setting "canary" equal to the count of the task group allows
# blue/green deployments. When the job is updated, a full set of the new
# version is deployed and upon promotion the old version is stopped.
- canary = 1
+ canary = ${canary}
# Specifies if the job should auto-promote to the canary version when all
# canaries become healthy during a deployment. Defaults to false which means
# canaries must be manually updated with the nomad deployment promote
# command.
- auto_promote = true
+ auto_promote = ${auto_promote}
# The "auto_revert" parameter specifies if the job should auto-revert to the
# last stable job on deployment failure. A job is marked as stable if all the
# allocations as part of its deployment were marked healthy.
- auto_revert = true
+ auto_revert = ${auto_revert}
%{ endif }
}
- # The reschedule stanza specifies the group's rescheduling strategy. If
- # specified at the job level, the configuration will apply to all groups
- # within the job. If the reschedule stanza is present on both the job and the
- # group, they are merged with the group stanza taking the highest precedence
- # and then the job.
- reschedule {
- delay = "30s"
- delay_function = "constant"
- unlimited = true
+ # All groups in this job should be scheduled on different hosts.
+ constraint {
+ operator = "distinct_hosts"
+ value = "true"
}
# The "group" stanza defines a series of tasks that should be co-located on
# the same Nomad client. Any task within a group will be placed on the same
# client.
#
- # For more information and examples on the "group" stanza, please see
- # the online documentation at:
- #
# https://www.nomadproject.io/docs/job-specification/group
#
- group "prod-group1-${service_name}" {
+ group "${job_name}-group-1" {
# The "count" parameter specifies the number of the task groups that should
# be running under this group. This value must be non-negative and defaults
# to 1.
- count = ${group_count}
+ count = ${group_count}
+
+ # The volume stanza allows the group to specify that it requires a given
+ # volume from the cluster. The key of the stanza is the name of the volume
+ # as it will be exposed to task configuration.
+ #
+ # https://www.nomadproject.io/docs/job-specification/volume
+ %{ if use_host_volume }
+ volume "${job_name}-volume-1" {
+ type = "host"
+ read_only = false
+ source = "${volume_source}"
+ }
+ %{ endif }
# The restart stanza configures a tasks's behavior on task failure. Restarts
# happen on the client that is running the task.
@@ -103,57 +106,75 @@ job "${job_name}" {
# https://www.nomadproject.io/docs/job-specification/restart
#
restart {
- interval = "30m"
- attempts = 40
- delay = "15s"
- mode = "delay"
+ interval = "30m"
+ attempts = 40
+ delay = "15s"
+ mode = "delay"
}
# The constraint allows restricting the set of eligible nodes. Constraints
# may filter on attributes or client metadata.
#
- # For more information and examples on the "volume" stanza, please see
- # the online documentation at:
- #
# https://www.nomadproject.io/docs/job-specification/constraint
#
constraint {
- attribute = "$${attr.cpu.arch}"
- operator = "!="
- value = "arm64"
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
}
constraint {
- attribute = "$${node.class}"
- value = "builder"
+ attribute = "$${node.class}"
+ value = "builder"
+ }
+
+ # The network stanza specifies the networking requirements for the task
+ # group, including the network mode and port allocations. When scheduling
+ # jobs in Nomad they are provisioned across your fleet of machines along
+ # with other jobs and services. Because you don't know in advance what host
+ # your job will be provisioned on, Nomad will provide your tasks with
+ # network configuration when they start up.
+ #
+ # https://www.nomadproject.io/docs/job-specification/network
+ #
+ network {
+ port "${service_name}" {
+ static = ${port}
+ to = ${port}
+ }
}
# The "task" stanza creates an individual unit of work, such as a Docker
# container, web application, or batch processing.
#
- # For more information and examples on the "task" stanza, please see
- # the online documentation at:
- #
# https://www.nomadproject.io/docs/job-specification/task
#
- task "prod-task1-${service_name}" {
+ task "${job_name}-task-1" {
# The "driver" parameter specifies the task driver that should be used to
# run the task.
- driver = "exec"
+ driver = "exec"
+
+ %{ if use_host_volume }
+ volume_mount {
+ volume = "${job_name}-volume-1"
+ destination = "${volume_destination}"
+ read_only = false
+ }
+ %{ endif }
- %{ if use_vault_provider }
+ %{ if use_vault_provider }
vault {
- policies = "${vault_kv_policy_name}"
+ policies = "${vault_kv_policy_name}"
}
- %{ endif }
+ %{ endif }
# The "config" stanza specifies the driver configuration, which is passed
# directly to the driver to start the task. The details of configurations
# are specific to each driver, so please see specific driver
# documentation for more information.
config {
- command = "local/alertmanager-${version}.linux-amd64/alertmanager"
- args = [
+ command = "local/alertmanager-${version}.linux-amd64/alertmanager"
+ args = [
"--config.file=secrets/alertmanager.yml"
]
}
@@ -163,22 +184,16 @@ job "${job_name}" {
# popular go-getter library, which permits downloading artifacts from a
# variety of locations using a URL as the input source.
#
- # For more information and examples on the "artifact" stanza, please see
- # the online documentation at:
- #
# https://www.nomadproject.io/docs/job-specification/artifact
#
artifact {
- source = "${url}"
+ source = "${url}"
}
# The "template" stanza instructs Nomad to manage a template, such as
# a configuration file or script. This template can optionally pull data
# from Consul or Vault to populate runtime configuration data.
#
- # For more information and examples on the "template" stanza, please see
- # the online documentation at:
- #
# https://www.nomadproject.io/docs/job-specification/template
#
template {
@@ -337,15 +352,15 @@ EOH
# https://www.nomadproject.io/docs/job-specification/service
#
service {
- name = "${service_name}"
- port = "${service_name}"
- tags = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ]
+ name = "${service_name}"
+ port = "${service_name}"
+ tags = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ]
check {
- name = "Alertmanager Check Live"
- type = "http"
- path = "/-/healthy"
- interval = "10s"
- timeout = "2s"
+ name = "Alertmanager Check Live"
+ type = "http"
+ path = "/-/healthy"
+ interval = "10s"
+ timeout = "2s"
}
}
@@ -354,32 +369,12 @@ EOH
# This ensures the task will execute on a machine that contains enough
# resource capacity.
#
- # For more information and examples on the "resources" stanza, please see
- # the online documentation at:
- #
# https://www.nomadproject.io/docs/job-specification/resources
#
resources {
- cpu = ${cpu}
- memory = ${mem}
- # The network stanza specifies the networking requirements for the task
- # group, including the network mode and port allocations. When scheduling
- # jobs in Nomad they are provisioned across your fleet of machines along
- # with other jobs and services. Because you don't know in advance what host
- # your job will be provisioned on, Nomad will provide your tasks with
- # network configuration when they start up.
- #
- # For more information and examples on the "template" stanza, please see
- # the online documentation at:
- #
- # https://www.nomadproject.io/docs/job-specification/network
- #
- network {
- port "${service_name}" {
- static = ${port}
- }
- }
+ cpu = ${cpu}
+ memory = ${memory}
}
}
}
-} \ No newline at end of file
+}
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/fdio/main.tf b/fdio.infra.terraform/1n_nmd/alertmanager/fdio/main.tf
new file mode 100644
index 0000000000..745e450a8c
--- /dev/null
+++ b/fdio.infra.terraform/1n_nmd/alertmanager/fdio/main.tf
@@ -0,0 +1,14 @@
+module "alertmanager" {
+ providers = {
+ nomad = nomad.yul1
+ }
+ source = "../"
+
+ # alertmanager
+ datacenters = ["yul1"]
+ slack_jenkins_api_key = "TE07RD1V1/B01U1NV9HV3/hKZXJJ74g2JcISq4K3QC1eG9"
+ slack_jenkins_channel = "fdio-jobs-monitoring"
+ slack_default_api_key = "TE07RD1V1/B01UUK23B6C/hZTcCu42FUv8d6rtirHtcYIi"
+ slack_default_channel = "fdio-infra-monitoring"
+ am_version = "0.23.0"
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/fdio/providers.tf b/fdio.infra.terraform/1n_nmd/alertmanager/fdio/providers.tf
new file mode 100644
index 0000000000..42a6a45ce0
--- /dev/null
+++ b/fdio.infra.terraform/1n_nmd/alertmanager/fdio/providers.tf
@@ -0,0 +1,13 @@
+provider "nomad" {
+ address = var.nomad_provider_address
+ alias = "yul1"
+ # ca_file = var.nomad_provider_ca_file
+ # cert_file = var.nomad_provider_cert_file
+ # key_file = var.nomad_provider_key_file
+}
+
+provider "vault" {
+ address = var.vault_provider_address
+ skip_tls_verify = var.vault_provider_skip_tls_verify
+ token = var.vault_provider_token
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/fdio/variables.tf b/fdio.infra.terraform/1n_nmd/alertmanager/fdio/variables.tf
new file mode 100644
index 0000000000..7d5be09d21
--- /dev/null
+++ b/fdio.infra.terraform/1n_nmd/alertmanager/fdio/variables.tf
@@ -0,0 +1,47 @@
+variable "nomad_acl" {
+ description = "Nomad ACLs enabled/disabled."
+ type = bool
+ default = false
+}
+
+variable "nomad_provider_address" {
+ description = "FD.io Nomad cluster address."
+ type = string
+ default = "http://10.32.8.14:4646"
+}
+
+variable "nomad_provider_ca_file" {
+ description = "A local file path to a PEM-encoded certificate authority."
+ type = string
+ default = "/etc/nomad.d/ssl/nomad-ca.pem"
+}
+
+variable "nomad_provider_cert_file" {
+ description = "A local file path to a PEM-encoded certificate."
+ type = string
+ default = "/etc/nomad.d/ssl/nomad-cli.pem"
+}
+
+variable "nomad_provider_key_file" {
+ description = "A local file path to a PEM-encoded private key."
+ type = string
+ default = "/etc/nomad.d/ssl/nomad-cli-key.pem"
+}
+
+variable "vault_provider_address" {
+ description = "Vault cluster address."
+ type = string
+ default = "http://10.30.51.28:8200"
+}
+
+variable "vault_provider_skip_tls_verify" {
+ description = "Verification of the Vault server's TLS certificate."
+ type = bool
+ default = false
+}
+
+variable "vault_provider_token" {
+ description = "Vault root token."
+ type = string
+ sensitive = true
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/fdio/versions.tf b/fdio.infra.terraform/1n_nmd/alertmanager/fdio/versions.tf
new file mode 100644
index 0000000000..385c5c3f18
--- /dev/null
+++ b/fdio.infra.terraform/1n_nmd/alertmanager/fdio/versions.tf
@@ -0,0 +1,17 @@
+terraform {
+ backend "consul" {
+ address = "10.32.8.14:8500"
+ scheme = "http"
+ path = "terraform/alertmanager"
+ }
+ required_providers {
+ nomad = {
+ source = "hashicorp/nomad"
+ version = ">= 1.4.16"
+ }
+ vault = {
+ version = ">= 3.2.1"
+ }
+ }
+ required_version = ">= 1.1.4"
+} \ No newline at end of file
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/main.tf b/fdio.infra.terraform/1n_nmd/alertmanager/main.tf
index b7ab5dce92..e8a1389150 100644
--- a/fdio.infra.terraform/1n_nmd/alertmanager/main.tf
+++ b/fdio.infra.terraform/1n_nmd/alertmanager/main.tf
@@ -1,40 +1,48 @@
locals {
- datacenters = join(",", var.nomad_datacenters)
-
- alertmanager_url = join("",
+ datacenters = join(",", var.datacenters)
+ url = join("",
[
"https://github.com",
"/prometheus/alertmanager/releases/download/",
- "v${var.alertmanager_version}/",
- "alertmanager-${var.alertmanager_version}.linux-amd64.tar.gz"
+ "v${var.am_version}/",
+ "alertmanager-${var.am_version}.linux-amd64.tar.gz"
]
)
}
-data "template_file" "nomad_job_alertmanager" {
- template = file("${path.module}/conf/nomad/alertmanager.hcl")
- vars = {
- datacenters = local.datacenters
- url = local.alertmanager_url
- job_name = var.alertmanager_job_name
- use_canary = var.alertmanager_use_canary
- group_count = var.alertmanager_group_count
- service_name = var.alertmanager_service_name
- use_vault_provider = var.alertmanager_vault_secret.use_vault_provider
- version = var.alertmanager_version
- cpu = var.alertmanager_cpu
- mem = var.alertmanager_mem
- port = var.alertmanager_port
- slack_jenkins_api_key = var.alertmanager_slack_jenkins_api_key
- slack_jenkins_channel = var.alertmanager_slack_jenkins_channel
- slack_jenkins_receiver = var.alertmanager_slack_jenkins_receiver
- slack_default_api_key = var.alertmanager_slack_default_api_key
- slack_default_channel = var.alertmanager_slack_default_channel
- slack_default_receiver = var.alertmanager_slack_default_receiver
- }
-}
-
resource "nomad_job" "nomad_job_alertmanager" {
- jobspec = data.template_file.nomad_job_alertmanager.rendered
- detach = false
-} \ No newline at end of file
+ jobspec = templatefile(
+ "${path.module}/conf/nomad/alertmanager.hcl.tftpl",
+ {
+ auto_promote = var.auto_promote,
+ auto_revert = var.auto_revert,
+ canary = var.canary,
+ cpu = var.cpu,
+ datacenters = local.datacenters,
+ group_count = var.group_count,
+ job_name = var.job_name,
+ max_parallel = var.max_parallel,
+ memory = var.memory
+ port = var.port,
+ region = var.region,
+ service_name = var.service_name,
+ slack_jenkins_api_key = var.slack_jenkins_api_key,
+ slack_jenkins_channel = var.slack_jenkins_channel,
+ slack_jenkins_receiver = var.slack_jenkins_receiver,
+ slack_default_api_key = var.slack_default_api_key,
+ slack_default_channel = var.slack_default_channel,
+ slack_default_receiver = var.slack_default_receiver,
+ url = local.url,
+ use_canary = var.use_canary,
+ use_host_volume = var.use_host_volume,
+ use_vault_provider = var.vault_secret.use_vault_provider,
+ vault_kv_policy_name = var.vault_secret.vault_kv_policy_name,
+ vault_kv_path = var.vault_secret.vault_kv_path,
+ vault_kv_field_access_key = var.vault_secret.vault_kv_field_access_key,
+ vault_kv_field_secret_key = var.vault_secret.vault_kv_field_secret_key,
+ version = var.am_version,
+ volume_destination = var.volume_destination,
+ volume_source = var.volume_source
+ })
+ detach = false
+}
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/variables.tf b/fdio.infra.terraform/1n_nmd/alertmanager/variables.tf
index e24ceb64c6..e452598fa6 100644
--- a/fdio.infra.terraform/1n_nmd/alertmanager/variables.tf
+++ b/fdio.infra.terraform/1n_nmd/alertmanager/variables.tf
@@ -1,43 +1,102 @@
# Nomad
-variable "nomad_datacenters" {
- description = "Nomad data centers"
+variable "datacenters" {
+ description = "Specifies the list of DCs to be considered placing this task"
type = list(string)
default = ["dc1"]
}
-# Alermanager
-variable "alertmanager_job_name" {
- description = "Job name"
+variable "region" {
+ description = "Specifies the list of DCs to be considered placing this task"
type = string
- default = "alertmanager"
+ default = "global"
+}
+
+variable "volume_source" {
+ description = "The name of the volume to request"
+ type = string
+ default = "persistence"
+}
+
+# Alertmanager
+variable "am_version" {
+ description = "Alertmanager version"
+ type = string
+ default = "0.21.0"
+}
+
+variable "auto_promote" {
+ description = "Specifies if the job should auto-promote to the canary version"
+ type = bool
+ default = true
+}
+
+variable "auto_revert" {
+ description = "Specifies if the job should auto-revert to the last stable job"
+ type = bool
+ default = true
}
-variable "alertmanager_group_count" {
- description = "Number of group instances"
+variable "canary" {
+ description = "Equal to the count of the task group allows blue/green depl."
type = number
default = 1
}
-variable "alertmanager_service_name" {
- description = "Service name"
+variable "cpu" {
+ description = "CPU allocation"
+ type = number
+ default = 1000
+}
+
+variable "group_count" {
+ description = "Specifies the number of the task groups running under this one"
+ type = number
+ default = 1
+}
+
+variable "job_name" {
+ description = "Specifies a name for the job"
type = string
default = "alertmanager"
}
-variable "alertmanager_version" {
- description = "Version"
+variable "max_parallel" {
+ description = "Specifies the maximum number of updates to perform in parallel"
+ type = number
+ default = 1
+}
+
+variable "memory" {
+ description = "Specifies the memory required in MB"
+ type = number
+ default = 1024
+}
+
+variable "port" {
+ description = "Specifies the static TCP/UDP port to allocate"
+ type = number
+ default = 9093
+}
+
+variable "service_name" {
+ description = "Specifies the name this service will be advertised in Consul"
type = string
- default = "0.21.0"
+ default = "alertmanager"
}
-variable "alertmanager_use_canary" {
+variable "use_canary" {
description = "Uses canary deployment"
type = bool
+ default = true
+}
+
+variable "use_host_volume" {
+ description = "Use Nomad host volume feature"
+ type = bool
default = false
}
-variable "alertmanager_vault_secret" {
- description = "Set of properties to be able to fetch secret from vault"
+variable "vault_secret" {
type = object({
use_vault_provider = bool,
vault_kv_policy_name = string,
@@ -45,57 +104,53 @@ variable "alertmanager_vault_secret" {
vault_kv_field_access_key = string,
vault_kv_field_secret_key = string
})
+ description = "Set of properties to be able to fetch secret from vault."
+ default = {
+ use_vault_provider = false
+ vault_kv_policy_name = "kv"
+ vault_kv_path = "secret/data/alertmanager"
+ vault_kv_field_access_key = "access_key"
+ vault_kv_field_secret_key = "secret_key"
+ }
}
-variable "alertmanager_cpu" {
- description = "CPU allocation"
- type = number
- default = 1000
-}
-
-variable "alertmanager_mem" {
- description = "RAM allocation"
- type = number
- default = 1024
-}
-
-variable "alertmanager_port" {
- description = "TCP allocation"
- type = number
- default = 9093
+variable "volume_destination" {
+ description = "Specifies where the volume should be mounted inside the task"
+ type = string
+ default = "/data/"
}
-variable "alertmanager_slack_jenkins_api_key" {
+variable "slack_jenkins_api_key" {
description = "Alertmanager jenkins slack API key"
type = string
default = "XXXXXXXXX/XXXXXXXXXXX/XXXXXXXXXXXXXXXXXXXXXXXX"
}
-variable "alertmanager_slack_jenkins_receiver" {
+variable "slack_jenkins_receiver" {
description = "Alertmanager jenkins slack receiver"
type = string
default = "jenkins-slack-receiver"
}
-variable "alertmanager_slack_jenkins_channel" {
+variable "slack_jenkins_channel" {
description = "Alertmanager jenkins slack channel"
type = string
default = "jenkins-channel"
}
-variable "alertmanager_slack_default_api_key" {
+variable "slack_default_api_key" {
description = "Alertmanager default slack API key"
type = string
default = "XXXXXXXXX/XXXXXXXXXXX/XXXXXXXXXXXXXXXXXXXXXXXX"
}
-variable "alertmanager_slack_default_receiver" {
+variable "slack_default_receiver" {
description = "Alertmanager default slack receiver"
type = string
default = "default-slack-receiver"
}
-variable "alertmanager_slack_default_channel" {
+variable "slack_default_channel" {
description = "Alertmanager default slack channel"
type = string
default = "default-channel"
diff --git a/fdio.infra.terraform/1n_nmd/alertmanager/versions.tf b/fdio.infra.terraform/1n_nmd/alertmanager/versions.tf
index b80610a525..5f283ed4ea 100644
--- a/fdio.infra.terraform/1n_nmd/alertmanager/versions.tf
+++ b/fdio.infra.terraform/1n_nmd/alertmanager/versions.tf
@@ -2,12 +2,8 @@ terraform {
required_providers {
nomad = {
source = "hashicorp/nomad"
- version = "~> 1.4.15"
- }
- template = {
- source = "hashicorp/template"
- version = "~> 2.2.0"
+ version = ">= 1.4.16"
}
}
- required_version = ">= 1.0.3"
-}
+ required_version = ">= 1.1.4"
+} \ No newline at end of file