diff options
author | pmikus <pmikus@cisco.com> | 2021-02-05 14:51:43 +0000 |
---|---|---|
committer | Peter Mikus <pmikus@cisco.com> | 2021-02-10 09:04:46 +0000 |
commit | 0017c9d8372ef306ac73aae22bb0d17631c944d2 (patch) | |
tree | d24d4ff9ee33b4a31cdddfba89d2ae9a4b2e0fdd /terraform-ci-infra/1n_nmd/prometheus | |
parent | 60b531215d36e2402b1b6c768bd4fd4d4b210fd0 (diff) |
Infra: JenkinsJobHealthExporter
- Integration of Jenkins Job checker
Signed-off-by: pmikus <pmikus@cisco.com>
Change-Id: I822039cb64a3a352b49314ddab7c6099af3fe644
Diffstat (limited to 'terraform-ci-infra/1n_nmd/prometheus')
-rw-r--r-- | terraform-ci-infra/1n_nmd/prometheus/conf/nomad/prometheus.hcl | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/terraform-ci-infra/1n_nmd/prometheus/conf/nomad/prometheus.hcl b/terraform-ci-infra/1n_nmd/prometheus/conf/nomad/prometheus.hcl index 4918a5f5bd..d851628fcd 100644 --- a/terraform-ci-infra/1n_nmd/prometheus/conf/nomad/prometheus.hcl +++ b/terraform-ci-infra/1n_nmd/prometheus/conf/nomad/prometheus.hcl @@ -188,6 +188,24 @@ job "${job_name}" { data = <<EOH --- groups: +- name: "Jenkins Job Health Exporter" + rules: + - alert: JenkinsJobHealthExporterFailures + expr: jenkins_job_failure{id=~".*"} >= 10 + for: 0m + labels: + severity: critical + annotations: + summary: "Jenkins Job Health detected high failure rate on jenkins jobs." + description: "Job: {{ $labels.id }}" + - alert: JenkinsJobHealthExporterUnstable + expr: jenkins_job_unstable{id=~".*"} >= 10 + for: 0m + labels: + severity: warning + annotations: + summary: "Jenkins Job Health detected high unstable rate on jenkins jobs." + description: "Job: {{ $labels.id }}" - name: "Consul" rules: - alert: ConsulServiceHealthcheckFailed @@ -523,6 +541,20 @@ scrape_configs: - targets: [ '10.32.8.16:8080' ] - targets: [ '10.32.8.17:8080' ] + - job_name: 'Jenkins Job Health Exporter' + static_configs: + - targets: [ '10.30.51.32:9186' ] + metric_relabel_configs: + - source_labels: [ __name__ ] + regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$' + action: replace + replacement: '$1' + target_label: id + - source_labels: [ __name__ ] + regex: '^(vpp.*|csit.*)_(success|failure|total|unstable|reqtime_ms)$' + replacement: 'jenkins_job_$2' + target_label: __name__ + - job_name: 'Node Exporter' static_configs: - targets: [ '10.30.51.28:9100' ] |