diff options
author | pmikus <pmikus@cisco.com> | 2021-07-28 10:44:43 +0000 |
---|---|---|
committer | Peter Mikus <pmikus@cisco.com> | 2021-07-28 11:00:55 +0000 |
commit | 8843893ca7531cbb2212a5ed79882909c8374381 (patch) | |
tree | e99fdf71c923fff39c4b20b8b69263993f4b3972 /fdio.infra.terraform/1n_nmd/prometheus | |
parent | cf63723c2758fb5561bd1810a2725b1e187f9c1f (diff) |
Infra: upgrade monitoring solution
+ bump versions
- remove cadvisor
Signed-off-by: pmikus <pmikus@cisco.com>
Change-Id: I3de95531f1b09f7254152254e92f225dde653e45
Diffstat (limited to 'fdio.infra.terraform/1n_nmd/prometheus')
-rw-r--r-- | fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl | 34 | ||||
-rw-r--r-- | fdio.infra.terraform/1n_nmd/prometheus/variables.tf | 2 |
2 files changed, 2 insertions, 34 deletions
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl b/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl index adc30318c4..3d0b2c2eef 100644 --- a/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl +++ b/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl @@ -175,7 +175,7 @@ job "${job_name}" { args = [ "--config.file=secrets/prometheus.yml", "--storage.tsdb.path=${data_dir}prometheus/", - "--storage.tsdb.retention.time=15d" + "--storage.tsdb.retention.time=7d" ] } @@ -265,14 +265,6 @@ groups: annotations: summary: "Prometheus target missing (instance {{ $labels.instance }})." description: "A Prometheus target has disappeared. An exporter might be crashed." - - alert: HostHighCpuLoad - expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 95 - for: 0m - labels: - severity: warning - annotations: - summary: "Host high CPU load (instance {{ $labels.instance }})." - description: "CPU load is > 95%." - alert: HostOutOfMemory expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10 for: 2m @@ -540,30 +532,6 @@ scrape_configs: replacement: localhost:9115 metrics_path: /probe - - job_name: 'cAdvisor Exporter' - static_configs: - - targets: [ '10.30.51.28:8080' ] - - targets: [ '10.30.51.29:8080' ] - - targets: [ '10.30.51.30:8080' ] - #- targets: [ '10.30.51.32:8080' ] - - targets: [ '10.30.51.33:8080' ] - - targets: [ '10.30.51.34:8080' ] - - targets: [ '10.30.51.35:8080' ] - - targets: [ '10.30.51.39:8080' ] - - targets: [ '10.30.51.40:8080' ] - - targets: [ '10.30.51.50:8080' ] - - targets: [ '10.30.51.51:8080' ] - - targets: [ '10.30.51.65:8080' ] - - targets: [ '10.30.51.66:8080' ] - - targets: [ '10.30.51.67:8080' ] - - targets: [ '10.30.51.68:8080' ] - - targets: [ '10.30.51.70:8080' ] - - targets: [ '10.30.51.71:8080' ] - - targets: [ '10.32.8.14:8080' ] - - targets: [ '10.32.8.15:8080' ] - - targets: [ '10.32.8.16:8080' ] - - targets: [ '10.32.8.17:8080' ] - - job_name: 'Jenkins Job Health Exporter' static_configs: - targets: [ '10.30.51.32:9186' ] diff --git a/fdio.infra.terraform/1n_nmd/prometheus/variables.tf b/fdio.infra.terraform/1n_nmd/prometheus/variables.tf index a509533ccd..55ffa33856 100644 --- a/fdio.infra.terraform/1n_nmd/prometheus/variables.tf +++ b/fdio.infra.terraform/1n_nmd/prometheus/variables.tf @@ -33,7 +33,7 @@ variable "prometheus_service_name" { variable "prometheus_version" { description = "Prometheus version" type = string - default = "v2.24.0" + default = "v2.28.1" } variable "prometheus_use_canary" { |