diff options
Diffstat (limited to 'terraform-ci-infra/1n_nmd/exporter')
-rw-r--r-- | terraform-ci-infra/1n_nmd/exporter/conf/nomad/exporter.hcl | 587 | ||||
-rw-r--r-- | terraform-ci-infra/1n_nmd/exporter/main.tf | 64 | ||||
-rw-r--r-- | terraform-ci-infra/1n_nmd/exporter/variables.tf | 76 |
3 files changed, 727 insertions, 0 deletions
diff --git a/terraform-ci-infra/1n_nmd/exporter/conf/nomad/exporter.hcl b/terraform-ci-infra/1n_nmd/exporter/conf/nomad/exporter.hcl new file mode 100644 index 0000000000..4fd0768ae7 --- /dev/null +++ b/terraform-ci-infra/1n_nmd/exporter/conf/nomad/exporter.hcl @@ -0,0 +1,587 @@ +job "${job_name}" { + # The "region" parameter specifies the region in which to execute the job. + # If omitted, this inherits the default region name of "global". + # region = "global" + # + # The "datacenters" parameter specifies the list of datacenters which should + # be considered when placing this task. This must be provided. + datacenters = "${datacenters}" + + # The "type" parameter controls the type of job, which impacts the scheduler's + # decision on placement. This configuration is optional and defaults to + # "service". For a full list of job types and their differences, please see + # the online documentation. + # + # https://www.nomadproject.io/docs/jobspec/schedulers + # + type = "system" + + update { + # The "max_parallel" parameter specifies the maximum number of updates to + # perform in parallel. In this case, this specifies to update a single task + # at a time. + max_parallel = 1 + + health_check = "checks" + + # The "min_healthy_time" parameter specifies the minimum time the allocation + # must be in the healthy state before it is marked as healthy and unblocks + # further allocations from being updated. + min_healthy_time = "10s" + + # The "healthy_deadline" parameter specifies the deadline in which the + # allocation must be marked as healthy after which the allocation is + # automatically transitioned to unhealthy. Transitioning to unhealthy will + # fail the deployment and potentially roll back the job if "auto_revert" is + # set to true. + healthy_deadline = "3m" + + # The "progress_deadline" parameter specifies the deadline in which an + # allocation must be marked as healthy. The deadline begins when the first + # allocation for the deployment is created and is reset whenever an allocation + # as part of the deployment transitions to a healthy state. If no allocation + # transitions to the healthy state before the progress deadline, the + # deployment is marked as failed. + progress_deadline = "10m" + +%{ if use_canary } + # The "canary" parameter specifies that changes to the job that would result + # in destructive updates should create the specified number of canaries + # without stopping any previous allocations. Once the operator determines the + # canaries are healthy, they can be promoted which unblocks a rolling update + # of the remaining allocations at a rate of "max_parallel". + # + # Further, setting "canary" equal to the count of the task group allows + # blue/green deployments. When the job is updated, a full set of the new + # version is deployed and upon promotion the old version is stopped. + canary = 1 + + # Specifies if the job should auto-promote to the canary version when all + # canaries become healthy during a deployment. Defaults to false which means + # canaries must be manually updated with the nomad deployment promote + # command. + auto_promote = true + + # The "auto_revert" parameter specifies if the job should auto-revert to the + # last stable job on deployment failure. A job is marked as stable if all the + # allocations as part of its deployment were marked healthy. + auto_revert = true +%{ endif } + } + + # The "group" stanza defines a series of tasks that should be co-located on + # the same Nomad client. Any task within a group will be placed on the same + # client. + # + # https://www.nomadproject.io/docs/job-specification/group + # + group "prod-group1-exporter-amd64" { + # The constraint allows restricting the set of eligible nodes. Constraints + # may filter on attributes or client metadata. + # + # https://www.nomadproject.io/docs/job-specification/constraint + # + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + + # The "task" stanza creates an individual unit of work, such as a Docker + # container, web application, or batch processing. + # + # https://www.nomadproject.io/docs/job-specification/task + # + task "prod-task1-${node_service_name}-amd64" { + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "raw_exec" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + command = "local/node_exporter-${node_version}.linux-amd64/node_exporter" + } + + # The artifact stanza instructs Nomad to fetch and unpack a remote resource, + # such as a file, tarball, or binary. Nomad downloads artifacts using the + # popular go-getter library, which permits downloading artifacts from a + # variety of locations using a URL as the input source. + # + # https://www.nomadproject.io/docs/job-specification/artifact + # + artifact { + source = "${node_url_amd64}" + } + + # The service stanza instructs Nomad to register a service with Consul. + # + # https://www.nomadproject.io/docs/job-specification/service + # + service { + name = "${node_service_name}" + port = "${node_service_name}" + check { + name = "Node Exporter Check Live" + type = "http" + path = "/metrics" + interval = "10s" + timeout = "2s" + } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = 500 + # The network stanza specifies the networking requirements for the task + # group, including the network mode and port allocations. When scheduling + # jobs in Nomad they are provisioned across your fleet of machines along + # with other jobs and services. Because you don't know in advance what host + # your job will be provisioned on, Nomad will provide your tasks with + # network configuration when they start up. + # + # https://www.nomadproject.io/docs/job-specification/network + # + network { + port "${node_service_name}" { + static = ${node_port} + } + } + } + } + task "prod-task2-${blackbox_service_name}-amd64" { + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "exec" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + command = "local/blackbox_exporter-${blackbox_version}.linux-amd64/blackbox_exporter" + args = [ + "--config.file=secrets/blackbox.yml" + ] + } + + # The "template" stanza instructs Nomad to manage a template, such as + # a configuration file or script. This template can optionally pull data + # from Consul or Vault to populate runtime configuration data. + # + # https://www.nomadproject.io/docs/job-specification/template + # + template { + change_mode = "noop" + change_signal = "SIGINT" + destination = "secrets/blackbox.yml" + data = <<EOH +modules: + http_2xx: + prober: http + timeout: 5s + http: + valid_http_versions: ["HTTP/1.1", "HTTP/2.0"] + no_follow_redirects: false + fail_if_ssl: false + fail_if_not_ssl: true + tls_config: + insecure_skip_verify: false + preferred_ip_protocol: "ip4" + icmp_v4: + prober: icmp + timeout: 5s + icmp: + preferred_ip_protocol: "ip4" + dns_udp: + prober: dns + timeout: 5s + dns: + query_name: "jenkins.fd.io" + query_type: "A" + valid_rcodes: + - NOERROR +EOH + } + + # The artifact stanza instructs Nomad to fetch and unpack a remote resource, + # such as a file, tarball, or binary. Nomad downloads artifacts using the + # popular go-getter library, which permits downloading artifacts from a + # variety of locations using a URL as the input source. + # + # https://www.nomadproject.io/docs/job-specification/artifact + # + artifact { + source = "${blackbox_url_amd64}" + } + + # The service stanza instructs Nomad to register a service with Consul. + # + # https://www.nomadproject.io/docs/job-specification/service + # + service { + name = "${blackbox_service_name}" + port = "${blackbox_service_name}" + tags = [ "${blackbox_service_name}$${NOMAD_ALLOC_INDEX}" ] + check { + name = "Blackbox Exporter Check Live" + type = "http" + path = "/metrics" + interval = "10s" + timeout = "2s" + } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = 500 + # The network stanza specifies the networking requirements for the task + # group, including the network mode and port allocations. When scheduling + # jobs in Nomad they are provisioned across your fleet of machines along + # with other jobs and services. Because you don't know in advance what host + # your job will be provisioned on, Nomad will provide your tasks with + # network configuration when they start up. + # + # https://www.nomadproject.io/docs/job-specification/network + # + network { + port "${blackbox_service_name}" { + static = ${blackbox_port} + } + } + } + } + + task "prod-task3-${cadvisor_service_name}-amd64" { + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "docker" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + image = "${cadvisor_image}" + volumes = [ + "/:/rootfs:ro", + "/var/run:/var/run:rw", + "/sys:/sys:ro", + "/var/lib/docker/:/var/lib/docker:ro", + "/cgroup:/cgroup:ro" + ] + } + + # The service stanza instructs Nomad to register a service with Consul. + # + # https://www.nomadproject.io/docs/job-specification/service + # + service { + name = "${cadvisor_service_name}" + port = "${cadvisor_service_name}" + check { + name = "cAdvisor Check Live" + type = "http" + path = "/metrics" + interval = "10s" + timeout = "2s" + } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = 500 + # The network stanza specifies the networking requirements for the task + # group, including the network mode and port allocations. When scheduling + # jobs in Nomad they are provisioned across your fleet of machines along + # with other jobs and services. Because you don't know in advance what host + # your job will be provisioned on, Nomad will provide your tasks with + # network configuration when they start up. + # + # https://www.nomadproject.io/docs/job-specification/network + # + network { + port "${cadvisor_service_name}" { + static = ${cadvisor_port} + } + } + } + } + } + + group "prod-group1-exporter-arm64" { + # The constraint allows restricting the set of eligible nodes. Constraints + # may filter on attributes or client metadata. + # + # https://www.nomadproject.io/docs/job-specification/constraint + # + constraint { + attribute = "$${attr.cpu.arch}" + operator = "==" + value = "arm64" + } + + # The "task" stanza creates an individual unit of work, such as a Docker + # container, web application, or batch processing. + # + # https://www.nomadproject.io/docs/job-specification/task + # + task "prod-task1-${node_service_name}-arm64" { + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "raw_exec" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + command = "local/node_exporter-${node_version}.linux-arm64/node_exporter" + } + + # The artifact stanza instructs Nomad to fetch and unpack a remote resource, + # such as a file, tarball, or binary. Nomad downloads artifacts using the + # popular go-getter library, which permits downloading artifacts from a + # variety of locations using a URL as the input source. + # + # https://www.nomadproject.io/docs/job-specification/artifact + # + artifact { + source = "${node_url_arm64}" + } + + # The service stanza instructs Nomad to register a service with Consul. + # + # https://www.nomadproject.io/docs/job-specification/service + # + service { + name = "${node_service_name}" + port = "${node_service_name}" + check { + name = "Node Exporter Check Live" + type = "http" + path = "/metrics" + interval = "10s" + timeout = "2s" + } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = 500 + # The network stanza specifies the networking requirements for the task + # group, including the network mode and port allocations. When scheduling + # jobs in Nomad they are provisioned across your fleet of machines along + # with other jobs and services. Because you don't know in advance what host + # your job will be provisioned on, Nomad will provide your tasks with + # network configuration when they start up. + # + # https://www.nomadproject.io/docs/job-specification/network + # + network { + port "${node_service_name}" { + static = ${node_port} + } + } + } + } + + task "prod-task2-${blackbox_service_name}-arm64" { + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "exec" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + command = "local/blackbox_exporter-${blackbox_version}.linux-arm64/blackbox_exporter" + args = [ + "--config.file=secrets/blackbox.yml" + ] + } + + # The "template" stanza instructs Nomad to manage a template, such as + # a configuration file or script. This template can optionally pull data + # from Consul or Vault to populate runtime configuration data. + # + # https://www.nomadproject.io/docs/job-specification/template + # + template { + change_mode = "noop" + change_signal = "SIGINT" + destination = "secrets/blackbox.yml" + data = <<EOH +modules: + http_2xx: + prober: http + timeout: 5s + http: + valid_http_versions: ["HTTP/1.1", "HTTP/2.0"] + no_follow_redirects: false + fail_if_ssl: false + fail_if_not_ssl: true + tls_config: + insecure_skip_verify: false + preferred_ip_protocol: "ip4" + icmp_v4: + prober: icmp + timeout: 5s + icmp: + preferred_ip_protocol: "ip4" + dns_udp: + prober: dns + timeout: 5s + dns: + query_name: "jenkins.fd.io" + query_type: "A" + valid_rcodes: + - NOERROR +EOH + } + + # The artifact stanza instructs Nomad to fetch and unpack a remote resource, + # such as a file, tarball, or binary. Nomad downloads artifacts using the + # popular go-getter library, which permits downloading artifacts from a + # variety of locations using a URL as the input source. + # + # https://www.nomadproject.io/docs/job-specification/artifact + # + artifact { + source = "${blackbox_url_arm64}" + } + + # The service stanza instructs Nomad to register a service with Consul. + # + # https://www.nomadproject.io/docs/job-specification/service + # + service { + name = "${blackbox_service_name}" + port = "${blackbox_service_name}" + tags = [ "${blackbox_service_name}$${NOMAD_ALLOC_INDEX}" ] + check { + name = "Blackbox Exporter Check Live" + type = "http" + path = "/metrics" + interval = "10s" + timeout = "2s" + } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = 500 + # The network stanza specifies the networking requirements for the task + # group, including the network mode and port allocations. When scheduling + # jobs in Nomad they are provisioned across your fleet of machines along + # with other jobs and services. Because you don't know in advance what host + # your job will be provisioned on, Nomad will provide your tasks with + # network configuration when they start up. + # + # https://www.nomadproject.io/docs/job-specification/network + # + network { + port "${blackbox_service_name}" { + static = ${blackbox_port} + } + } + } + } + + task "prod-task3-${cadvisor_service_name}-arm64" { + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "docker" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + # There is currently no official release for arm yet...using community. + image = "zcube/cadvisor:latest" + volumes = [ + "/:/rootfs:ro", + "/var/run:/var/run:rw", + "/sys:/sys:ro", + "/var/lib/docker/:/var/lib/docker:ro", + "/cgroup:/cgroup:ro" + ] + } + + # The service stanza instructs Nomad to register a service with Consul. + # + # https://www.nomadproject.io/docs/job-specification/service + # + service { + name = "${cadvisor_service_name}" + port = "${cadvisor_service_name}" + check { + name = "cAdvisor Check Live" + type = "http" + path = "/metrics" + interval = "10s" + timeout = "2s" + } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = 500 + # The network stanza specifies the networking requirements for the task + # group, including the network mode and port allocations. When scheduling + # jobs in Nomad they are provisioned across your fleet of machines along + # with other jobs and services. Because you don't know in advance what host + # your job will be provisioned on, Nomad will provide your tasks with + # network configuration when they start up. + # + # https://www.nomadproject.io/docs/job-specification/network + # + network { + port "${cadvisor_service_name}" { + static = ${cadvisor_port} + } + } + } + } + } +}
\ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/exporter/main.tf b/terraform-ci-infra/1n_nmd/exporter/main.tf new file mode 100644 index 0000000000..35eb95b071 --- /dev/null +++ b/terraform-ci-infra/1n_nmd/exporter/main.tf @@ -0,0 +1,64 @@ +locals { + datacenters = join(",", var.nomad_datacenters) + + node_url_amd64 = join("", + [ + "https://github.com", + "/prometheus/node_exporter/releases/download/", + "v${var.node_version}/", + "node_exporter-${var.node_version}.linux-amd64.tar.gz" + ] + ) + node_url_arm64 = join("", + [ + "https://github.com", + "/prometheus/node_exporter/releases/download/", + "v${var.node_version}/", + "node_exporter-${var.node_version}.linux-arm64.tar.gz" + ] + ) + + blackbox_url_amd64 = join("", + [ + "https://github.com", + "/prometheus/blackbox_exporter/releases/download/", + "v${var.blackbox_version}/", + "blackbox_exporter-${var.blackbox_version}.linux-amd64.tar.gz" + ] + ) + blackbox_url_arm64 = join("", + [ + "https://github.com", + "/prometheus/blackbox_exporter/releases/download/", + "v${var.blackbox_version}/", + "blackbox_exporter-${var.blackbox_version}.linux-arm64.tar.gz" + ] + ) +} + +data "template_file" "nomad_job_exporter" { + template = file("${path.module}/conf/nomad/exporter.hcl") + vars = { + datacenters = local.datacenters + job_name = var.exporter_job_name + use_canary = var.exporter_use_canary + node_url_amd64 = local.node_url_amd64 + node_url_arm64 = local.node_url_arm64 + node_version = var.node_version + node_service_name = var.node_service_name + node_port = var.node_port + blackbox_url_amd64 = local.blackbox_url_amd64 + blackbox_url_arm64 = local.blackbox_url_arm64 + blackbox_version = var.blackbox_version + blackbox_service_name = var.blackbox_service_name + blackbox_port = var.blackbox_port + cadvisor_image = var.cadvisor_image + cadvisor_service_name = var.cadvisor_service_name + cadvisor_port = var.cadvisor_port + } +} + +resource "nomad_job" "nomad_job_exporter" { + jobspec = data.template_file.nomad_job_exporter.rendered + detach = false +}
\ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/exporter/variables.tf b/terraform-ci-infra/1n_nmd/exporter/variables.tf new file mode 100644 index 0000000000..bfa8bd37ac --- /dev/null +++ b/terraform-ci-infra/1n_nmd/exporter/variables.tf @@ -0,0 +1,76 @@ +# Nomad +variable "nomad_datacenters" { + description = "Nomad data centers" + type = list(string) + default = [ "dc1" ] +} + +# Exporter +variable "exporter_job_name" { + description = "Exporter job name" + type = string + default = "exporter" +} + +variable "exporter_use_canary" { + description = "Uses canary deployment" + type = bool + default = false +} + +# Node Exporter +variable "node_service_name" { + description = "Node exporter service name" + type = string + default = "nodeexporter" +} + +variable "node_version" { + description = "Node exporter version" + type = string + default = "1.0.1" +} + +variable "node_port" { + description = "Node exporter TCP allocation" + type = number + default = 9100 +} + +# Blackbox Exporter +variable "blackbox_service_name" { + description = "Blackbox exporter service name" + type = string + default = "blackboxexporter" +} + +variable "blackbox_version" { + description = "Blackbox exporter version" + type = string + default = "0.18.0" +} + +variable "blackbox_port" { + description = "Blackbox exporter TCP allocation" + type = number + default = 9115 +} + +# cAdvisor Exporter +variable "cadvisor_service_name" { + description = "cAdvisor exporter service name" + type = string + default = "cadvisorexporter" +} + +variable "cadvisor_image" { + description = "cAdvisor exporter docker image" + type = string + default = "gcr.io/cadvisor/cadvisor:v0.38.7" +} + +variable "cadvisor_port" { + description = "cAdvisor exporter TCP allocation" + type = number + default = 8080 +}
\ No newline at end of file |