From 8843893ca7531cbb2212a5ed79882909c8374381 Mon Sep 17 00:00:00 2001 From: pmikus Date: Wed, 28 Jul 2021 10:44:43 +0000 Subject: Infra: upgrade monitoring solution + bump versions - remove cadvisor Signed-off-by: pmikus Change-Id: I3de95531f1b09f7254152254e92f225dde653e45 --- .../1n_nmd/prometheus/conf/nomad/prometheus.hcl | 34 +--------------------- .../1n_nmd/prometheus/variables.tf | 2 +- 2 files changed, 2 insertions(+), 34 deletions(-) (limited to 'fdio.infra.terraform/1n_nmd/prometheus') diff --git a/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl b/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl index adc30318c4..3d0b2c2eef 100644 --- a/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl +++ b/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl @@ -175,7 +175,7 @@ job "${job_name}" { args = [ "--config.file=secrets/prometheus.yml", "--storage.tsdb.path=${data_dir}prometheus/", - "--storage.tsdb.retention.time=15d" + "--storage.tsdb.retention.time=7d" ] } @@ -265,14 +265,6 @@ groups: annotations: summary: "Prometheus target missing (instance {{ $labels.instance }})." description: "A Prometheus target has disappeared. An exporter might be crashed." - - alert: HostHighCpuLoad - expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 95 - for: 0m - labels: - severity: warning - annotations: - summary: "Host high CPU load (instance {{ $labels.instance }})." - description: "CPU load is > 95%." - alert: HostOutOfMemory expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10 for: 2m @@ -540,30 +532,6 @@ scrape_configs: replacement: localhost:9115 metrics_path: /probe - - job_name: 'cAdvisor Exporter' - static_configs: - - targets: [ '10.30.51.28:8080' ] - - targets: [ '10.30.51.29:8080' ] - - targets: [ '10.30.51.30:8080' ] - #- targets: [ '10.30.51.32:8080' ] - - targets: [ '10.30.51.33:8080' ] - - targets: [ '10.30.51.34:8080' ] - - targets: [ '10.30.51.35:8080' ] - - targets: [ '10.30.51.39:8080' ] - - targets: [ '10.30.51.40:8080' ] - - targets: [ '10.30.51.50:8080' ] - - targets: [ '10.30.51.51:8080' ] - - targets: [ '10.30.51.65:8080' ] - - targets: [ '10.30.51.66:8080' ] - - targets: [ '10.30.51.67:8080' ] - - targets: [ '10.30.51.68:8080' ] - - targets: [ '10.30.51.70:8080' ] - - targets: [ '10.30.51.71:8080' ] - - targets: [ '10.32.8.14:8080' ] - - targets: [ '10.32.8.15:8080' ] - - targets: [ '10.32.8.16:8080' ] - - targets: [ '10.32.8.17:8080' ] - - job_name: 'Jenkins Job Health Exporter' static_configs: - targets: [ '10.30.51.32:9186' ] diff --git a/fdio.infra.terraform/1n_nmd/prometheus/variables.tf b/fdio.infra.terraform/1n_nmd/prometheus/variables.tf index a509533ccd..55ffa33856 100644 --- a/fdio.infra.terraform/1n_nmd/prometheus/variables.tf +++ b/fdio.infra.terraform/1n_nmd/prometheus/variables.tf @@ -33,7 +33,7 @@ variable "prometheus_service_name" { variable "prometheus_version" { description = "Prometheus version" type = string - default = "v2.24.0" + default = "v2.28.1" } variable "prometheus_use_canary" { -- cgit 1.2.3-korg