aboutsummaryrefslogtreecommitdiffstats
path: root/fdio.infra.terraform/1n_nmd/prometheus
diff options
context:
space:
mode:
authorpmikus <pmikus@cisco.com>2021-07-28 10:44:43 +0000
committerPeter Mikus <pmikus@cisco.com>2021-07-28 11:00:55 +0000
commit8843893ca7531cbb2212a5ed79882909c8374381 (patch)
treee99fdf71c923fff39c4b20b8b69263993f4b3972 /fdio.infra.terraform/1n_nmd/prometheus
parentcf63723c2758fb5561bd1810a2725b1e187f9c1f (diff)
Infra: upgrade monitoring solution
+ bump versions - remove cadvisor Signed-off-by: pmikus <pmikus@cisco.com> Change-Id: I3de95531f1b09f7254152254e92f225dde653e45
Diffstat (limited to 'fdio.infra.terraform/1n_nmd/prometheus')
-rw-r--r--fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl34
-rw-r--r--fdio.infra.terraform/1n_nmd/prometheus/variables.tf2
2 files changed, 2 insertions, 34 deletions
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl b/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl
index adc30318c4..3d0b2c2eef 100644
--- a/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl
+++ b/fdio.infra.terraform/1n_nmd/prometheus/conf/nomad/prometheus.hcl
@@ -175,7 +175,7 @@ job "${job_name}" {
args = [
"--config.file=secrets/prometheus.yml",
"--storage.tsdb.path=${data_dir}prometheus/",
- "--storage.tsdb.retention.time=15d"
+ "--storage.tsdb.retention.time=7d"
]
}
@@ -265,14 +265,6 @@ groups:
annotations:
summary: "Prometheus target missing (instance {{ $labels.instance }})."
description: "A Prometheus target has disappeared. An exporter might be crashed."
- - alert: HostHighCpuLoad
- expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 95
- for: 0m
- labels:
- severity: warning
- annotations:
- summary: "Host high CPU load (instance {{ $labels.instance }})."
- description: "CPU load is > 95%."
- alert: HostOutOfMemory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
for: 2m
@@ -540,30 +532,6 @@ scrape_configs:
replacement: localhost:9115
metrics_path: /probe
- - job_name: 'cAdvisor Exporter'
- static_configs:
- - targets: [ '10.30.51.28:8080' ]
- - targets: [ '10.30.51.29:8080' ]
- - targets: [ '10.30.51.30:8080' ]
- #- targets: [ '10.30.51.32:8080' ]
- - targets: [ '10.30.51.33:8080' ]
- - targets: [ '10.30.51.34:8080' ]
- - targets: [ '10.30.51.35:8080' ]
- - targets: [ '10.30.51.39:8080' ]
- - targets: [ '10.30.51.40:8080' ]
- - targets: [ '10.30.51.50:8080' ]
- - targets: [ '10.30.51.51:8080' ]
- - targets: [ '10.30.51.65:8080' ]
- - targets: [ '10.30.51.66:8080' ]
- - targets: [ '10.30.51.67:8080' ]
- - targets: [ '10.30.51.68:8080' ]
- - targets: [ '10.30.51.70:8080' ]
- - targets: [ '10.30.51.71:8080' ]
- - targets: [ '10.32.8.14:8080' ]
- - targets: [ '10.32.8.15:8080' ]
- - targets: [ '10.32.8.16:8080' ]
- - targets: [ '10.32.8.17:8080' ]
-
- job_name: 'Jenkins Job Health Exporter'
static_configs:
- targets: [ '10.30.51.32:9186' ]
diff --git a/fdio.infra.terraform/1n_nmd/prometheus/variables.tf b/fdio.infra.terraform/1n_nmd/prometheus/variables.tf
index a509533ccd..55ffa33856 100644
--- a/fdio.infra.terraform/1n_nmd/prometheus/variables.tf
+++ b/fdio.infra.terraform/1n_nmd/prometheus/variables.tf
@@ -33,7 +33,7 @@ variable "prometheus_service_name" {
variable "prometheus_version" {
description = "Prometheus version"
type = string
- default = "v2.24.0"
+ default = "v2.28.1"
}
variable "prometheus_use_canary" {