diff options
author | pmikus <pmikus@cisco.com> | 2021-01-07 14:27:38 +0000 |
---|---|---|
committer | pmikus <pmikus@cisco.com> | 2021-01-29 08:44:37 +0000 |
commit | a44eef233de959c5679602dc6cd1ed866d6abc14 (patch) | |
tree | 2c4d0cd5e0d1293ae86a09cf26009b869c72360d /terraform-ci-infra/1n_nmd/exporter/conf/nomad | |
parent | 474163bc78aa418595b227c81056987b1114104d (diff) |
Infra: Monitoring capability
+ Monitoring SOA
+ Nomad alertmanager job
+ Nomad prometheus job
+ Nomad grafana job
Signed-off-by: pmikus <pmikus@cisco.com>
Change-Id: I0b32e9c87276ba1a2d4a5322816f3473c737eae2
Diffstat (limited to 'terraform-ci-infra/1n_nmd/exporter/conf/nomad')
-rw-r--r-- | terraform-ci-infra/1n_nmd/exporter/conf/nomad/exporter.hcl | 587 |
1 files changed, 587 insertions, 0 deletions
diff --git a/terraform-ci-infra/1n_nmd/exporter/conf/nomad/exporter.hcl b/terraform-ci-infra/1n_nmd/exporter/conf/nomad/exporter.hcl new file mode 100644 index 0000000000..4fd0768ae7 --- /dev/null +++ b/terraform-ci-infra/1n_nmd/exporter/conf/nomad/exporter.hcl @@ -0,0 +1,587 @@ +job "${job_name}" { + # The "region" parameter specifies the region in which to execute the job. + # If omitted, this inherits the default region name of "global". + # region = "global" + # + # The "datacenters" parameter specifies the list of datacenters which should + # be considered when placing this task. This must be provided. + datacenters = "${datacenters}" + + # The "type" parameter controls the type of job, which impacts the scheduler's + # decision on placement. This configuration is optional and defaults to + # "service". For a full list of job types and their differences, please see + # the online documentation. + # + # https://www.nomadproject.io/docs/jobspec/schedulers + # + type = "system" + + update { + # The "max_parallel" parameter specifies the maximum number of updates to + # perform in parallel. In this case, this specifies to update a single task + # at a time. + max_parallel = 1 + + health_check = "checks" + + # The "min_healthy_time" parameter specifies the minimum time the allocation + # must be in the healthy state before it is marked as healthy and unblocks + # further allocations from being updated. + min_healthy_time = "10s" + + # The "healthy_deadline" parameter specifies the deadline in which the + # allocation must be marked as healthy after which the allocation is + # automatically transitioned to unhealthy. Transitioning to unhealthy will + # fail the deployment and potentially roll back the job if "auto_revert" is + # set to true. + healthy_deadline = "3m" + + # The "progress_deadline" parameter specifies the deadline in which an + # allocation must be marked as healthy. The deadline begins when the first + # allocation for the deployment is created and is reset whenever an allocation + # as part of the deployment transitions to a healthy state. If no allocation + # transitions to the healthy state before the progress deadline, the + # deployment is marked as failed. + progress_deadline = "10m" + +%{ if use_canary } + # The "canary" parameter specifies that changes to the job that would result + # in destructive updates should create the specified number of canaries + # without stopping any previous allocations. Once the operator determines the + # canaries are healthy, they can be promoted which unblocks a rolling update + # of the remaining allocations at a rate of "max_parallel". + # + # Further, setting "canary" equal to the count of the task group allows + # blue/green deployments. When the job is updated, a full set of the new + # version is deployed and upon promotion the old version is stopped. + canary = 1 + + # Specifies if the job should auto-promote to the canary version when all + # canaries become healthy during a deployment. Defaults to false which means + # canaries must be manually updated with the nomad deployment promote + # command. + auto_promote = true + + # The "auto_revert" parameter specifies if the job should auto-revert to the + # last stable job on deployment failure. A job is marked as stable if all the + # allocations as part of its deployment were marked healthy. + auto_revert = true +%{ endif } + } + + # The "group" stanza defines a series of tasks that should be co-located on + # the same Nomad client. Any task within a group will be placed on the same + # client. + # + # https://www.nomadproject.io/docs/job-specification/group + # + group "prod-group1-exporter-amd64" { + # The constraint allows restricting the set of eligible nodes. Constraints + # may filter on attributes or client metadata. + # + # https://www.nomadproject.io/docs/job-specification/constraint + # + constraint { + attribute = "$${attr.cpu.arch}" + operator = "!=" + value = "arm64" + } + + # The "task" stanza creates an individual unit of work, such as a Docker + # container, web application, or batch processing. + # + # https://www.nomadproject.io/docs/job-specification/task + # + task "prod-task1-${node_service_name}-amd64" { + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "raw_exec" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + command = "local/node_exporter-${node_version}.linux-amd64/node_exporter" + } + + # The artifact stanza instructs Nomad to fetch and unpack a remote resource, + # such as a file, tarball, or binary. Nomad downloads artifacts using the + # popular go-getter library, which permits downloading artifacts from a + # variety of locations using a URL as the input source. + # + # https://www.nomadproject.io/docs/job-specification/artifact + # + artifact { + source = "${node_url_amd64}" + } + + # The service stanza instructs Nomad to register a service with Consul. + # + # https://www.nomadproject.io/docs/job-specification/service + # + service { + name = "${node_service_name}" + port = "${node_service_name}" + check { + name = "Node Exporter Check Live" + type = "http" + path = "/metrics" + interval = "10s" + timeout = "2s" + } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = 500 + # The network stanza specifies the networking requirements for the task + # group, including the network mode and port allocations. When scheduling + # jobs in Nomad they are provisioned across your fleet of machines along + # with other jobs and services. Because you don't know in advance what host + # your job will be provisioned on, Nomad will provide your tasks with + # network configuration when they start up. + # + # https://www.nomadproject.io/docs/job-specification/network + # + network { + port "${node_service_name}" { + static = ${node_port} + } + } + } + } + task "prod-task2-${blackbox_service_name}-amd64" { + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "exec" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + command = "local/blackbox_exporter-${blackbox_version}.linux-amd64/blackbox_exporter" + args = [ + "--config.file=secrets/blackbox.yml" + ] + } + + # The "template" stanza instructs Nomad to manage a template, such as + # a configuration file or script. This template can optionally pull data + # from Consul or Vault to populate runtime configuration data. + # + # https://www.nomadproject.io/docs/job-specification/template + # + template { + change_mode = "noop" + change_signal = "SIGINT" + destination = "secrets/blackbox.yml" + data = <<EOH +modules: + http_2xx: + prober: http + timeout: 5s + http: + valid_http_versions: ["HTTP/1.1", "HTTP/2.0"] + no_follow_redirects: false + fail_if_ssl: false + fail_if_not_ssl: true + tls_config: + insecure_skip_verify: false + preferred_ip_protocol: "ip4" + icmp_v4: + prober: icmp + timeout: 5s + icmp: + preferred_ip_protocol: "ip4" + dns_udp: + prober: dns + timeout: 5s + dns: + query_name: "jenkins.fd.io" + query_type: "A" + valid_rcodes: + - NOERROR +EOH + } + + # The artifact stanza instructs Nomad to fetch and unpack a remote resource, + # such as a file, tarball, or binary. Nomad downloads artifacts using the + # popular go-getter library, which permits downloading artifacts from a + # variety of locations using a URL as the input source. + # + # https://www.nomadproject.io/docs/job-specification/artifact + # + artifact { + source = "${blackbox_url_amd64}" + } + + # The service stanza instructs Nomad to register a service with Consul. + # + # https://www.nomadproject.io/docs/job-specification/service + # + service { + name = "${blackbox_service_name}" + port = "${blackbox_service_name}" + tags = [ "${blackbox_service_name}$${NOMAD_ALLOC_INDEX}" ] + check { + name = "Blackbox Exporter Check Live" + type = "http" + path = "/metrics" + interval = "10s" + timeout = "2s" + } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = 500 + # The network stanza specifies the networking requirements for the task + # group, including the network mode and port allocations. When scheduling + # jobs in Nomad they are provisioned across your fleet of machines along + # with other jobs and services. Because you don't know in advance what host + # your job will be provisioned on, Nomad will provide your tasks with + # network configuration when they start up. + # + # https://www.nomadproject.io/docs/job-specification/network + # + network { + port "${blackbox_service_name}" { + static = ${blackbox_port} + } + } + } + } + + task "prod-task3-${cadvisor_service_name}-amd64" { + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "docker" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + image = "${cadvisor_image}" + volumes = [ + "/:/rootfs:ro", + "/var/run:/var/run:rw", + "/sys:/sys:ro", + "/var/lib/docker/:/var/lib/docker:ro", + "/cgroup:/cgroup:ro" + ] + } + + # The service stanza instructs Nomad to register a service with Consul. + # + # https://www.nomadproject.io/docs/job-specification/service + # + service { + name = "${cadvisor_service_name}" + port = "${cadvisor_service_name}" + check { + name = "cAdvisor Check Live" + type = "http" + path = "/metrics" + interval = "10s" + timeout = "2s" + } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = 500 + # The network stanza specifies the networking requirements for the task + # group, including the network mode and port allocations. When scheduling + # jobs in Nomad they are provisioned across your fleet of machines along + # with other jobs and services. Because you don't know in advance what host + # your job will be provisioned on, Nomad will provide your tasks with + # network configuration when they start up. + # + # https://www.nomadproject.io/docs/job-specification/network + # + network { + port "${cadvisor_service_name}" { + static = ${cadvisor_port} + } + } + } + } + } + + group "prod-group1-exporter-arm64" { + # The constraint allows restricting the set of eligible nodes. Constraints + # may filter on attributes or client metadata. + # + # https://www.nomadproject.io/docs/job-specification/constraint + # + constraint { + attribute = "$${attr.cpu.arch}" + operator = "==" + value = "arm64" + } + + # The "task" stanza creates an individual unit of work, such as a Docker + # container, web application, or batch processing. + # + # https://www.nomadproject.io/docs/job-specification/task + # + task "prod-task1-${node_service_name}-arm64" { + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "raw_exec" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + command = "local/node_exporter-${node_version}.linux-arm64/node_exporter" + } + + # The artifact stanza instructs Nomad to fetch and unpack a remote resource, + # such as a file, tarball, or binary. Nomad downloads artifacts using the + # popular go-getter library, which permits downloading artifacts from a + # variety of locations using a URL as the input source. + # + # https://www.nomadproject.io/docs/job-specification/artifact + # + artifact { + source = "${node_url_arm64}" + } + + # The service stanza instructs Nomad to register a service with Consul. + # + # https://www.nomadproject.io/docs/job-specification/service + # + service { + name = "${node_service_name}" + port = "${node_service_name}" + check { + name = "Node Exporter Check Live" + type = "http" + path = "/metrics" + interval = "10s" + timeout = "2s" + } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = 500 + # The network stanza specifies the networking requirements for the task + # group, including the network mode and port allocations. When scheduling + # jobs in Nomad they are provisioned across your fleet of machines along + # with other jobs and services. Because you don't know in advance what host + # your job will be provisioned on, Nomad will provide your tasks with + # network configuration when they start up. + # + # https://www.nomadproject.io/docs/job-specification/network + # + network { + port "${node_service_name}" { + static = ${node_port} + } + } + } + } + + task "prod-task2-${blackbox_service_name}-arm64" { + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "exec" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + command = "local/blackbox_exporter-${blackbox_version}.linux-arm64/blackbox_exporter" + args = [ + "--config.file=secrets/blackbox.yml" + ] + } + + # The "template" stanza instructs Nomad to manage a template, such as + # a configuration file or script. This template can optionally pull data + # from Consul or Vault to populate runtime configuration data. + # + # https://www.nomadproject.io/docs/job-specification/template + # + template { + change_mode = "noop" + change_signal = "SIGINT" + destination = "secrets/blackbox.yml" + data = <<EOH +modules: + http_2xx: + prober: http + timeout: 5s + http: + valid_http_versions: ["HTTP/1.1", "HTTP/2.0"] + no_follow_redirects: false + fail_if_ssl: false + fail_if_not_ssl: true + tls_config: + insecure_skip_verify: false + preferred_ip_protocol: "ip4" + icmp_v4: + prober: icmp + timeout: 5s + icmp: + preferred_ip_protocol: "ip4" + dns_udp: + prober: dns + timeout: 5s + dns: + query_name: "jenkins.fd.io" + query_type: "A" + valid_rcodes: + - NOERROR +EOH + } + + # The artifact stanza instructs Nomad to fetch and unpack a remote resource, + # such as a file, tarball, or binary. Nomad downloads artifacts using the + # popular go-getter library, which permits downloading artifacts from a + # variety of locations using a URL as the input source. + # + # https://www.nomadproject.io/docs/job-specification/artifact + # + artifact { + source = "${blackbox_url_arm64}" + } + + # The service stanza instructs Nomad to register a service with Consul. + # + # https://www.nomadproject.io/docs/job-specification/service + # + service { + name = "${blackbox_service_name}" + port = "${blackbox_service_name}" + tags = [ "${blackbox_service_name}$${NOMAD_ALLOC_INDEX}" ] + check { + name = "Blackbox Exporter Check Live" + type = "http" + path = "/metrics" + interval = "10s" + timeout = "2s" + } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = 500 + # The network stanza specifies the networking requirements for the task + # group, including the network mode and port allocations. When scheduling + # jobs in Nomad they are provisioned across your fleet of machines along + # with other jobs and services. Because you don't know in advance what host + # your job will be provisioned on, Nomad will provide your tasks with + # network configuration when they start up. + # + # https://www.nomadproject.io/docs/job-specification/network + # + network { + port "${blackbox_service_name}" { + static = ${blackbox_port} + } + } + } + } + + task "prod-task3-${cadvisor_service_name}-arm64" { + # The "driver" parameter specifies the task driver that should be used to + # run the task. + driver = "docker" + + # The "config" stanza specifies the driver configuration, which is passed + # directly to the driver to start the task. The details of configurations + # are specific to each driver, so please see specific driver + # documentation for more information. + config { + # There is currently no official release for arm yet...using community. + image = "zcube/cadvisor:latest" + volumes = [ + "/:/rootfs:ro", + "/var/run:/var/run:rw", + "/sys:/sys:ro", + "/var/lib/docker/:/var/lib/docker:ro", + "/cgroup:/cgroup:ro" + ] + } + + # The service stanza instructs Nomad to register a service with Consul. + # + # https://www.nomadproject.io/docs/job-specification/service + # + service { + name = "${cadvisor_service_name}" + port = "${cadvisor_service_name}" + check { + name = "cAdvisor Check Live" + type = "http" + path = "/metrics" + interval = "10s" + timeout = "2s" + } + } + + # The "resources" stanza describes the requirements a task needs to + # execute. Resource requirements include memory, network, cpu, and more. + # This ensures the task will execute on a machine that contains enough + # resource capacity. + # + # https://www.nomadproject.io/docs/job-specification/resources + # + resources { + cpu = 500 + # The network stanza specifies the networking requirements for the task + # group, including the network mode and port allocations. When scheduling + # jobs in Nomad they are provisioned across your fleet of machines along + # with other jobs and services. Because you don't know in advance what host + # your job will be provisioned on, Nomad will provide your tasks with + # network configuration when they start up. + # + # https://www.nomadproject.io/docs/job-specification/network + # + network { + port "${cadvisor_service_name}" { + static = ${cadvisor_port} + } + } + } + } + } +}
\ No newline at end of file |