aboutsummaryrefslogtreecommitdiffstats
path: root/terraform-ci-infra/1n_nmd/grafana
diff options
context:
space:
mode:
Diffstat (limited to 'terraform-ci-infra/1n_nmd/grafana')
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_http.json1030
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_icmp.json368
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/consul.json1438
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/docker_cadvisor.json2040
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/node_exporter.json13696
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/nomad.json869
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/nomad/grafana.hcl331
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/prometheus.json3055
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/main.tf24
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/variables.tf66
10 files changed, 22917 insertions, 0 deletions
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_http.json b/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_http.json
new file mode 100644
index 0000000000..f9df1b239e
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_http.json
@@ -0,0 +1,1030 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "signcl-prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "5.2.2"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": "5.0.0"
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "5.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "Prometheus Blackbox Exporter Overview",
+ "editable": true,
+ "gnetId": 7587,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1534695504413,
+ "links": [],
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 138,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "probe_duration_seconds{instance=~\"$target\"}",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "{{ instance }}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Global Probe Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 1,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 8
+ },
+ "id": 15,
+ "panels": [],
+ "repeat": "target",
+ "title": "$target status",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 10,
+ "x": 4,
+ "y": 9
+ },
+ "id": 25,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "probe_http_duration_seconds{instance=~\"$target\"}",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "{{ phase }}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "HTTP Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 10,
+ "x": 14,
+ "y": 9
+ },
+ "id": 17,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "probe_duration_seconds{instance=~\"$target\"}",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "seconds",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Probe Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 4,
+ "x": 0,
+ "y": 11
+ },
+ "id": 20,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 3,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "repeat": null,
+ "repeatDirection": "h",
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "probe_http_status_code{instance=~\"$target\"}",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "201, 399",
+ "title": "HTTP Status Code",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ },
+ {
+ "op": "=",
+ "text": "YES",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "0"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 4,
+ "x": 0,
+ "y": 13
+ },
+ "id": 27,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "probe_http_version{instance=~\"$target\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "HTTP Version",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 4,
+ "x": 0,
+ "y": 15
+ },
+ "id": 18,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 3,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "repeat": null,
+ "repeatDirection": "v",
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "probe_http_ssl{instance=~\"$target\"}",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "0, 1",
+ "title": "SSL",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ },
+ {
+ "op": "=",
+ "text": "YES",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "NO",
+ "value": "0"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "format": "dtdurations",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 10,
+ "x": 4,
+ "y": 15
+ },
+ "id": 19,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 3,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "repeat": null,
+ "repeatDirection": "h",
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "probe_ssl_earliest_cert_expiry{instance=~\"$target\"} - time()",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "0,1209600",
+ "timeFrom": null,
+ "title": "SSL Expiry",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ },
+ {
+ "op": "=",
+ "text": "YES",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "NO",
+ "value": "0"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "s",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 5,
+ "x": 14,
+ "y": 15
+ },
+ "id": 23,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "repeat": null,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg(probe_duration_seconds{instance=~\"$target\"})",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Average Probe Duration",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "s",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 5,
+ "x": 19,
+ "y": 15
+ },
+ "id": 24,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "repeat": null,
+ "repeatDirection": "h",
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg(probe_dns_lookup_time_seconds{instance=~\"$target\"})",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Average DNS Lookup",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "refresh": "10s",
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "blackbox",
+ "prometheus"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "datasource",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "auto": true,
+ "auto_count": 10,
+ "auto_min": "10s",
+ "current": {
+ "text": "10s",
+ "value": "10s"
+ },
+ "hide": 0,
+ "label": "Interval",
+ "name": "interval",
+ "options": [
+ {
+ "selected": false,
+ "text": "auto",
+ "value": "$__auto_interval_interval"
+ },
+ {
+ "selected": false,
+ "text": "5s",
+ "value": "5s"
+ },
+ {
+ "selected": true,
+ "text": "10s",
+ "value": "10s"
+ },
+ {
+ "selected": false,
+ "text": "30s",
+ "value": "30s"
+ },
+ {
+ "selected": false,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "type": "interval"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": true,
+ "name": "target",
+ "options": [],
+ "query": "label_values(probe_success, instance)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-4h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "HTTP Exporter",
+ "version": 1
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_icmp.json b/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_icmp.json
new file mode 100644
index 0000000000..df30506348
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_icmp.json
@@ -0,0 +1,368 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "localhost",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "6.5.2"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "panel",
+ "id": "heatmap",
+ "name": "Heatmap",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": 12412,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1591284149575,
+ "links": [],
+ "panels": [
+ {
+ "cards": {
+ "cardPadding": null,
+ "cardRound": null
+ },
+ "color": {
+ "cardColor": "#b4ff00",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateRdYlGn",
+ "exponent": 0.5,
+ "mode": "spectrum"
+ },
+ "dataFormat": "tsbuckets",
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "heatmap": {},
+ "hideZeroBuckets": false,
+ "highlightCards": true,
+ "id": 7,
+ "legend": {
+ "show": true
+ },
+ "options": {},
+ "reverseYBuckets": true,
+ "targets": [
+ {
+ "expr": "sum(probe_icmp_duration_seconds{phase=\"rtt\"}) by (instance)",
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "ICMP RTT",
+ "tooltip": {
+ "show": true,
+ "showHistogram": true
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "xBucketNumber": null,
+ "xBucketSize": null,
+ "yAxis": {
+ "decimals": null,
+ "format": "s",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true,
+ "splitFactor": null
+ },
+ "yBucketBound": "middle",
+ "yBucketNumber": null,
+ "yBucketSize": null
+ },
+ {
+ "cards": {
+ "cardPadding": null,
+ "cardRound": null
+ },
+ "color": {
+ "cardColor": "#b4ff00",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateRdYlGn",
+ "exponent": 0.5,
+ "mode": "spectrum"
+ },
+ "dataFormat": "tsbuckets",
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 8
+ },
+ "heatmap": {},
+ "hideZeroBuckets": false,
+ "highlightCards": true,
+ "id": 8,
+ "legend": {
+ "show": true
+ },
+ "options": {},
+ "reverseYBuckets": true,
+ "targets": [
+ {
+ "expr": "1-avg_over_time(probe_success{instance=~\"$instance\"}[$__interval])",
+ "format": "time_series",
+ "hide": false,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "ICMP packet loss",
+ "tooltip": {
+ "show": true,
+ "showHistogram": true
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "xBucketNumber": null,
+ "xBucketSize": null,
+ "yAxis": {
+ "decimals": null,
+ "format": "percentunit",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true,
+ "splitFactor": null
+ },
+ "yBucketBound": "middle",
+ "yBucketNumber": null,
+ "yBucketSize": null
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "description": "This uses the blackbox exporter, which does not expose packet loss, for example. It could be improved with https://github.com/SuperQ/smokeping_prober because it also keeps track of lost samples (https://github.com/SuperQ/smokeping_prober/issues/24). Unfortunately, that still won't make graphs as nice as smokeping, because each probe only keeps one sample, instead of doing multiple like smokeping does (https://github.com/SuperQ/smokeping_prober/issues/36).",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 16
+ },
+ "hiddenSeries": false,
+ "id": 2,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "maxPerRow": 2,
+ "nullPointMode": "connected",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 0.5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "instance",
+ "repeatDirection": "v",
+ "seriesOverrides": [
+ {
+ "alias": "packet loss",
+ "color": "#C4162A",
+ "lines": false,
+ "pointradius": 1,
+ "points": true,
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum(probe_icmp_duration_seconds{phase=\"rtt\",instance=~\"$instance\"}) by (instance) > 0",
+ "instant": false,
+ "legendFormat": "RTT",
+ "refId": "A"
+ },
+ {
+ "expr": "1-avg_over_time(probe_success{instance=~\"$instance\"}[$__interval])",
+ "format": "time_series",
+ "legendFormat": "packet loss",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "ICMP round trip time ($instance)",
+ "tooltip": {
+ "shared": true,
+ "sort": 1,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "dtdurations",
+ "label": "RTT",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": "packet loss",
+ "logBase": 1,
+ "max": "1",
+ "min": "0.0001",
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": false,
+ "schemaVersion": 21,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "datasource",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(probe_success, instance)",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": true,
+ "name": "instance",
+ "options": [],
+ "query": "label_values(probe_success, instance)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-4h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ]
+ },
+ "timezone": "",
+ "title": "ICMP exporter",
+ "version": 1,
+ "description": "Graph ICMP metrics from the blackbox exporter, Smokeping-style"
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/consul.json b/terraform-ci-infra/1n_nmd/grafana/conf/consul.json
new file mode 100644
index 0000000000..2e4a36f076
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/consul.json
@@ -0,0 +1,1438 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "4.3.0-beta1"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": []
+ },
+ "editable": true,
+ "gnetId": 2351,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "rows": [
+ {
+ "collapse": false,
+ "height": 153,
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 1,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "consul_raft_leader_lastcontact_count",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{host}}",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "",
+ "title": "Consul Leader",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "name"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 3,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 17,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "COUNT (changes(consul_memberlist_gossep_sum[1m]) > 0) BY (labels)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "# servers in cluster",
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 18,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(node_cpu{mode=\"idle\", host=\"$consul\"}[1m])) * 100 / count_scalar(node_cpu{mode=\"user\", host=\"$consul\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "",
+ "title": "CPU Idle",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 4,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 14,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_load1{host=\"$consul\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "Load 1",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 4,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 15,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_load5{host=\"$consul\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "Load 5",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 4,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 16,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_load15{host=\"$consul\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "Load 15",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "The amount of TCP messages that are sent/received from the server.",
+ "fill": 1,
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(consul_memberlist_tcp{host=\"$consul\"}[1m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{type}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memberlist TCP Messages",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "The amount of UDP messages that are sent/received from the server.",
+ "fill": 1,
+ "id": 5,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(consul_memberlist_udp{host=\"$consul\"}[1m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{type}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memberlist UDP Messages",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "This measures the time it takes to replicate log entries to followers. This is a general indicator of the load pressure on the Consul servers, as well as the performance of the communication between the servers.",
+ "fill": 1,
+ "id": 6,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "consul_raft_replication_appendEntries_rpc",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{query}} - {{quantile}}%",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Log replication from leader to servers",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 7,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "consul_raft_replication_heartbeat",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{query}} - {{quantile}}%",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "consul_raft_replication_heartbeat",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "This measures the time it takes for the leader to write log entries to disk.",
+ "fill": 1,
+ "id": 8,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "consul_raft_leader_dispatchLog",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{quantile}}%",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Write logs",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "This measures the time it takes to commit a new entry to the Raft log on the leader.",
+ "fill": 1,
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "consul_raft_commitTime",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{quantile}}%",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Commit time Leader",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "This counts the number of Raft transactions occurring over the interval, which is a general indicator of the write load on the Consul servers.",
+ "fill": 1,
+ "id": 9,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "delta(consul_raft_apply[30s])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Transactions",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Raft Transactions",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "This will only be emitted by the Raft leader and measures the time since the leader was last able to contact the follower nodes when checking its leader lease. It can be used as a measure for how stable the Raft timing is and how close the leader is to timing out its lease.\n\nThe lease timeout is 500 ms times the raft_multiplier configuration, so this telemetry value should not be getting close to that configured value, otherwise the Raft timing is marginal and might need to be tuned, or more powerful servers might be needed. See the Server Performance guide for more details.",
+ "fill": 1,
+ "id": 10,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "consul_raft_leader_lastcontact",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{quantile}}%",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Leader lastContact",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 12,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "delta(consul_rpc_query{host=\"$consul\"}[30s])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Requests",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "RPC Requests",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Consul uses a network tomography system to compute network coordinates for nodes in the cluster. These coordinates allow the network round trip time to be estimated between any two nodes using a very simple calculation. This allows for many useful applications, such as finding the service node nearest a requesting node, or failing over to services in the next closest datacenter.",
+ "fill": 1,
+ "id": 13,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "consul_serf_coordinate_adjustment_ms{host=\"$consul\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{quantile}}%",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Serf Coordinates",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "datasource",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "consul",
+ "options": [],
+ "query": "label_values(consul_memberlist_gossep_sum, host)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-4h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Consul",
+ "version": 1
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/docker_cadvisor.json b/terraform-ci-infra/1n_nmd/grafana/conf/docker_cadvisor.json
new file mode 100644
index 0000000000..bbad614bb4
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/docker_cadvisor.json
@@ -0,0 +1,2040 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "6.2.4"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": ""
+ },
+ {
+ "type": "panel",
+ "id": "table",
+ "name": "Table",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "A simple overview of the most important Docker host and container metrics. (cAdvisor/Prometheus)",
+ "editable": true,
+ "gnetId": 10657,
+ "graphTooltip": 1,
+ "id": null,
+ "iteration": 1564715574785,
+ "links": [],
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "editable": true,
+ "error": false,
+ "format": "s",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 0,
+ "y": 0
+ },
+ "height": "",
+ "id": 24,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "30%",
+ "prefix": "",
+ "prefixFontSize": "20%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "time() - node_boot_time_seconds{instance=~\"$node:.*\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": "",
+ "title": "Uptime",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 4,
+ "y": 0
+ },
+ "id": 31,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(container_last_seen{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": "",
+ "title": "Containers",
+ "type": "singlestat",
+ "valueFontSize": "120%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "editable": true,
+ "error": false,
+ "format": "decbytes",
+ "gauge": {
+ "maxValue": 500000000,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 8,
+ "y": 0
+ },
+ "id": 30,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "(node_memory_SwapTotal_bytes{instance=~'$node:9100'} - node_memory_SwapFree_bytes{instance=~'$node:9100'})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": "400000000",
+ "title": "Swap",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "editable": true,
+ "error": false,
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 12,
+ "y": 0
+ },
+ "id": 27,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(50, 189, 31, 0.18)",
+ "full": false,
+ "lineColor": "rgb(69, 193, 31)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_load1{instance=~\"$node:9100\"} / count by(job, instance)(count by(job, instance, cpu)(node_cpu_seconds_total{instance=~\"$node:9100\"}))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": "0.8,0.9",
+ "title": "Load",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "alert": {
+ "conditions": [
+ {
+ "evaluator": {
+ "params": [
+ 10000000000
+ ],
+ "type": "gt"
+ },
+ "query": {
+ "params": [
+ "A",
+ "5m",
+ "now"
+ ]
+ },
+ "reducer": {
+ "params": [],
+ "type": "avg"
+ },
+ "type": "query"
+ }
+ ],
+ "executionErrorState": "alerting",
+ "frequency": "60s",
+ "handler": 1,
+ "name": "Available Memory alert",
+ "noDataState": "keep_state",
+ "notifications": [
+ {
+ "id": 1
+ }
+ ]
+ },
+ "aliasColors": {
+ "Available Memory": "#7EB26D",
+ "Unavailable Memory": "#7EB26D"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 10,
+ "w": 4,
+ "x": 16,
+ "y": 0
+ },
+ "id": 20,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "container_memory_rss{name=~\".+\"}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "D",
+ "step": 20
+ },
+ {
+ "expr": "sum(container_memory_rss{name=~\".+\"})",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "container_memory_usage_bytes{name=~\".+\"}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "expr": "container_memory_rss{id=\"/\"}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "C",
+ "step": 20
+ },
+ {
+ "expr": "sum(container_memory_rss)",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "E",
+ "step": 20
+ },
+ {
+ "expr": "node_memory_Buffers",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "node_memory_Dirty",
+ "refId": "N",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_MemFree",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "F",
+ "step": 20
+ },
+ {
+ "expr": "node_memory_MemAvailable",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "Available Memory",
+ "refId": "H",
+ "step": 20
+ },
+ {
+ "expr": "node_memory_MemTotal_bytes{instance=~\"$node:9100\"} - node_memory_MemAvailable_bytes{instance=~\"$node:9100\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Unavailable Memory",
+ "refId": "G",
+ "step": 600
+ },
+ {
+ "expr": "node_memory_Inactive",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "I",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_KernelStack",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "J",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_Active",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "K",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_MemTotal - (node_memory_Active + node_memory_MemFree + node_memory_Inactive)",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "Unknown",
+ "refId": "L",
+ "step": 40
+ },
+ {
+ "expr": "node_memory_MemFree + node_memory_Inactive ",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "M",
+ "step": 30
+ },
+ {
+ "expr": "container_memory_rss{name=~\".+\"}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "O",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_Inactive + node_memory_MemFree + node_memory_MemAvailable",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "P",
+ "step": 40
+ }
+ ],
+ "thresholds": [
+ {
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 10000000000,
+ "yaxis": "left"
+ }
+ ],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Available Memory",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": 16000000000,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "alert": {
+ "conditions": [
+ {
+ "evaluator": {
+ "params": [
+ 850000000000
+ ],
+ "type": "gt"
+ },
+ "query": {
+ "params": [
+ "A",
+ "5m",
+ "now"
+ ]
+ },
+ "reducer": {
+ "params": [],
+ "type": "avg"
+ },
+ "type": "query"
+ }
+ ],
+ "executionErrorState": "alerting",
+ "frequency": "60s",
+ "handler": 1,
+ "name": "Free/Used Disk Space alert",
+ "noDataState": "keep_state",
+ "notifications": [
+ {
+ "id": 1
+ }
+ ]
+ },
+ "aliasColors": {
+ "Belegete Festplatte": "#BF1B00",
+ "Free Disk Space": "#7EB26D",
+ "Used Disk Space": "#7EB26D",
+ "{}": "#BF1B00"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 10,
+ "w": 4,
+ "x": 20,
+ "y": 0
+ },
+ "id": 13,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Used Disk Space",
+ "yaxis": 1
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_size_bytes{fstype=\"rootfs\"} - node_filesystem_free_bytes{fstype=\"rootfs\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Used Disk Space",
+ "refId": "A",
+ "step": 600
+ }
+ ],
+ "thresholds": [
+ {
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 850000000000
+ }
+ ],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Used Disk Space",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": 1000000000000,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "SENT": "#BF1B00"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 0,
+ "y": 4
+ },
+ "id": 19,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "RECEIVED",
+ "refId": "A",
+ "step": 600
+ },
+ {
+ "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "SENT",
+ "refId": "B",
+ "step": 600
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 4,
+ "y": 4
+ },
+ "id": 25,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "((node_memory_MemTotal_bytes{instance=~\"$node:9100\"} - node_memory_MemAvailable_bytes{instance=~\"$node:9100\"}) / node_memory_MemTotal_bytes{instance=~\"$node:9100\"}) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": "70, 90",
+ "title": "Memory",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "aliasColors": {
+ "{id=\"/\",instance=\"cadvisor:8080\",job=\"prometheus\"}": "#BA43A9"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 8,
+ "y": 4
+ },
+ "id": 5,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_cpu_system_seconds_total[1m]))",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "a",
+ "refId": "B",
+ "step": 120
+ },
+ {
+ "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m]))",
+ "format": "time_series",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "nur container",
+ "refId": "F",
+ "step": 10
+ },
+ {
+ "expr": "sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m]))",
+ "format": "time_series",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "nur docker host",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "sum(rate(process_cpu_seconds_total[$interval])) * 100",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "host",
+ "metric": "",
+ "refId": "C",
+ "step": 600
+ },
+ {
+ "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m])) + sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m])) + sum(rate(process_cpu_seconds_total[1m]))",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 120
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU Usage",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "IN on /sda": "#7EB26D",
+ "OUT on /sda": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 12,
+ "y": 4
+ },
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "-sum(rate(node_disk_read_bytes_total[$interval])) by (device)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "OUT on /{{device}}",
+ "metric": "node_disk_bytes_read",
+ "refId": "A",
+ "step": 600
+ },
+ {
+ "expr": "sum(rate(node_disk_written_bytes_total[$interval])) by (device)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "IN on /{{device}}",
+ "metric": "",
+ "refId": "B",
+ "step": 600
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk I/O",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 10
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_network_receive_bytes_total{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}[$interval])) by (name)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Received Network Traffic per Container",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 10
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_network_transmit_bytes_total{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}[$interval])) by (name)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Sent Network Traffic per Container",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 10,
+ "max": 8,
+ "min": 0,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 5,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 17
+ },
+ "id": 1,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_cpu_usage_seconds_total{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}[$interval])) by (name) * 100",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "metric": "",
+ "refId": "F",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU Usage per Container",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 3,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 17
+ },
+ "id": 34,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_swap{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}) by (name)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "container_memory_usage_bytes{name=~\".+\"}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Swap per Container",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 3,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 24
+ },
+ "id": 10,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_rss{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}) by (name)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "container_memory_usage_bytes{name=~\".+\"}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Usage per Container",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "columns": [
+ {
+ "text": "Current",
+ "value": "current"
+ }
+ ],
+ "editable": true,
+ "error": false,
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 24
+ },
+ "id": 36,
+ "links": [],
+ "options": {},
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 0,
+ "desc": true
+ },
+ "styles": [
+ {
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [
+ "10000000",
+ " 25000000"
+ ],
+ "type": "number",
+ "unit": "decbytes"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"} - container_memory_usage_bytes{name=~\".+\"}) by (name) ",
+ "format": "table",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "metric": "",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "sum(container_spec_memory_limit_bytes{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}) by (name) ",
+ "format": "table",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "container_memory_usage_bytes{name=~\".+\"}",
+ "format": "table",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "C",
+ "step": 240
+ }
+ ],
+ "title": "Limit memory",
+ "transform": "table",
+ "type": "table"
+ }
+ ],
+ "refresh": "5m",
+ "schemaVersion": 18,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "datasource",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(container_cpu_user_seconds_total, job)",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Job",
+ "multi": false,
+ "name": "job",
+ "options": [],
+ "query": "label_values(container_cpu_user_seconds_total, job)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(container_cpu_user_seconds_total{job=~\"$job\"}, instance)",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Host:",
+ "multi": false,
+ "name": "node",
+ "options": [],
+ "query": "label_values(container_cpu_user_seconds_total{job=~\"$job\"}, instance)",
+ "refresh": 1,
+ "regex": "/([^:]+):.*/",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": null,
+ "tags": [],
+ "tagsQuery": null,
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(container_cpu_user_seconds_total{instance=~\"$node:(.*)\"}, instance)",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Port",
+ "multi": false,
+ "name": "port",
+ "options": [],
+ "query": "label_values(container_cpu_user_seconds_total{instance=~\"$node:(.*)\"}, instance)",
+ "refresh": 1,
+ "regex": "/[^:]+:(.*)/",
+ "skipUrlSync": false,
+ "sort": 3,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "auto": true,
+ "auto_count": 30,
+ "auto_min": "50s",
+ "current": {
+ "text": "1m",
+ "value": "1m"
+ },
+ "hide": 0,
+ "label": "Interval",
+ "name": "interval",
+ "options": [
+ {
+ "selected": false,
+ "text": "auto",
+ "value": "$__auto_interval_interval"
+ },
+ {
+ "selected": true,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "5m",
+ "value": "5m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "1m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "skipUrlSync": false,
+ "type": "interval"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-4h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Docker cAdvisor",
+ "version": 1
+}
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/node_exporter.json b/terraform-ci-infra/1n_nmd/grafana/conf/node_exporter.json
new file mode 100644
index 0000000000..766d5afec3
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/node_exporter.json
@@ -0,0 +1,13696 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "panel",
+ "id": "gauge",
+ "name": "Gauge",
+ "version": ""
+ },
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "6.7.3"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "$$hashKey": "object:1058",
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": 1860,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1595837627257,
+ "links": [],
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 261,
+ "panels": [],
+ "repeat": null,
+ "title": "Quick CPU / Mem / Disk",
+ "type": "row"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Busy state of all CPU cores together",
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 0,
+ "y": 1
+ },
+ "id": 20,
+ "links": [],
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "id": 0,
+ "op": "=",
+ "text": "N/A",
+ "type": 1,
+ "value": "null"
+ }
+ ],
+ "max": 100,
+ "min": 0,
+ "nullValueMode": "null",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 85
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 95
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": [],
+ "values": false
+ },
+ "orientation": "horizontal",
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "6.7.3",
+ "targets": [
+ {
+ "expr": "(((count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))) - avg(sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])))) * 100) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "title": "CPU Busy",
+ "type": "gauge"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Busy state of all CPU cores together (5 min average)",
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 3,
+ "y": 1
+ },
+ "id": 155,
+ "links": [],
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "id": 0,
+ "op": "=",
+ "text": "N/A",
+ "type": 1,
+ "value": "null"
+ }
+ ],
+ "max": 100,
+ "min": 0,
+ "nullValueMode": "null",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 85
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 95
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": [],
+ "values": false
+ },
+ "orientation": "horizontal",
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "6.7.3",
+ "targets": [
+ {
+ "expr": "avg(node_load5{instance=\"$node\",job=\"$job\"}) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)) * 100",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "title": "Sys Load (5m avg)",
+ "type": "gauge"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Busy state of all CPU cores together (15 min average)",
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 6,
+ "y": 1
+ },
+ "id": 19,
+ "links": [],
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "id": 0,
+ "op": "=",
+ "text": "N/A",
+ "type": 1,
+ "value": "null"
+ }
+ ],
+ "max": 100,
+ "min": 0,
+ "nullValueMode": "null",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 85
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 95
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": [],
+ "values": false
+ },
+ "orientation": "horizontal",
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "6.7.3",
+ "targets": [
+ {
+ "expr": "avg(node_load15{instance=\"$node\",job=\"$job\"}) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)) * 100",
+ "hide": false,
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "title": "Sys Load (15m avg)",
+ "type": "gauge"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Non available RAM memory",
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 9,
+ "y": 1
+ },
+ "hideTimeOverride": false,
+ "id": 16,
+ "links": [],
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "decimals": 0,
+ "mappings": [],
+ "max": 100,
+ "min": 0,
+ "nullValueMode": "null",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 80
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 90
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": [],
+ "values": false
+ },
+ "orientation": "horizontal",
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "6.7.3",
+ "targets": [
+ {
+ "expr": "((node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} )) * 100",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ },
+ {
+ "expr": "100 - ((node_memory_MemAvailable_bytes{instance=\"$node\",job=\"$job\"} * 100) / node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "refId": "B",
+ "step": 900
+ }
+ ],
+ "title": "RAM Used",
+ "type": "gauge"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Used Swap",
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 12,
+ "y": 1
+ },
+ "id": 21,
+ "links": [],
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "id": 0,
+ "op": "=",
+ "text": "N/A",
+ "type": 1,
+ "value": "null"
+ }
+ ],
+ "max": 100,
+ "min": 0,
+ "nullValueMode": "null",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 10
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 25
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": [],
+ "values": false
+ },
+ "orientation": "horizontal",
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "6.7.3",
+ "targets": [
+ {
+ "expr": "((node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} )) * 100",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "title": "SWAP Used",
+ "type": "gauge"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Used Root FS",
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 15,
+ "y": 1
+ },
+ "id": 154,
+ "links": [],
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "id": 0,
+ "op": "=",
+ "text": "N/A",
+ "type": 1,
+ "value": "null"
+ }
+ ],
+ "max": 100,
+ "min": 0,
+ "nullValueMode": "null",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 80
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 90
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": [],
+ "values": false
+ },
+ "orientation": "horizontal",
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "6.7.3",
+ "targets": [
+ {
+ "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"})",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "title": "Root FS Used",
+ "type": "gauge"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Total number of CPU cores",
+ "format": "short",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 2,
+ "x": 18,
+ "y": 1
+ },
+ "id": 14,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "thresholds": "",
+ "title": "CPU Cores",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 1,
+ "description": "System uptime",
+ "format": "s",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 4,
+ "x": 20,
+ "y": 1
+ },
+ "hideTimeOverride": true,
+ "id": 15,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "$$hashKey": "object:1094",
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "$$hashKey": "object:1095",
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "null",
+ "nullText": null,
+ "postfix": "s",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_time_seconds{instance=\"$node\",job=\"$job\"} - node_boot_time_seconds{instance=\"$node\",job=\"$job\"}",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": "",
+ "title": "Uptime",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "$$hashKey": "object:1097",
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "description": "Total RootFS",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 2,
+ "x": 18,
+ "y": 3
+ },
+ "id": 23,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "thresholds": "70,90",
+ "title": "RootFS Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "description": "Total RAM",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 2,
+ "x": 20,
+ "y": 3
+ },
+ "id": 75,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "70%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "thresholds": "",
+ "title": "RAM Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "description": "Total SWAP",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 2,
+ "x": 22,
+ "y": 3
+ },
+ "id": 18,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "70%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"}",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "thresholds": "",
+ "title": "SWAP Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "collapsed": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 5
+ },
+ "id": 263,
+ "panels": [],
+ "repeat": null,
+ "title": "Basic CPU / Mem / Net / Disk",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+ "Busy": "#EAB839",
+ "Busy Iowait": "#890F02",
+ "Busy other": "#1F78C1",
+ "Idle": "#052B51",
+ "Idle - Waiting for something to happen": "#052B51",
+ "guest": "#9AC48A",
+ "idle": "#052B51",
+ "iowait": "#EAB839",
+ "irq": "#BF1B00",
+ "nice": "#C15C17",
+ "softirq": "#E24D42",
+ "steal": "#FCE2DE",
+ "system": "#508642",
+ "user": "#5195CE"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "description": "Basic CPU info",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 6
+ },
+ "hiddenSeries": false,
+ "id": 77,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 250,
+ "sort": null,
+ "sortDesc": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": true,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Busy Iowait",
+ "color": "#890F02"
+ },
+ {
+ "alias": "Idle",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "Busy System",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "Busy User",
+ "color": "#0A437C"
+ },
+ {
+ "alias": "Busy Other",
+ "color": "#6D1F62"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Busy System",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode='user',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Busy User",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode='iowait',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Busy Iowait",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode=~\".*irq\",instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Busy IRQs",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "sum (irate(node_cpu_seconds_total{mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Busy Other",
+ "refId": "E",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Idle",
+ "refId": "F",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU Basic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": "100",
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "SWAP Used": "#BF1B00",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap Used": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "description": "Basic memory usage",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 6
+ },
+ "hiddenSeries": false,
+ "id": 78,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "RAM Total",
+ "color": "#E0F9D7",
+ "fill": 0,
+ "stack": false
+ },
+ {
+ "alias": "RAM Cache + Buffer",
+ "color": "#052B51"
+ },
+ {
+ "alias": "RAM Free",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "Avaliable",
+ "color": "#DEDAF7",
+ "fill": 0,
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "RAM Total",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - (node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "RAM Used",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "RAM Cache + Buffer",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "RAM Free",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "SWAP Used",
+ "refId": "E",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Basic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Recv_bytes_eth2": "#7EB26D",
+ "Recv_bytes_lo": "#0A50A1",
+ "Recv_drop_eth2": "#6ED0E0",
+ "Recv_drop_lo": "#E0F9D7",
+ "Recv_errs_eth2": "#BF1B00",
+ "Recv_errs_lo": "#CCA300",
+ "Trans_bytes_eth2": "#7EB26D",
+ "Trans_bytes_lo": "#0A50A1",
+ "Trans_drop_eth2": "#6ED0E0",
+ "Trans_drop_lo": "#E0F9D7",
+ "Trans_errs_eth2": "#BF1B00",
+ "Trans_errs_lo": "#CCA300",
+ "recv_bytes_lo": "#0A50A1",
+ "recv_drop_eth0": "#99440A",
+ "recv_drop_lo": "#967302",
+ "recv_errs_eth0": "#BF1B00",
+ "recv_errs_lo": "#890F02",
+ "trans_bytes_eth0": "#7EB26D",
+ "trans_bytes_lo": "#0A50A1",
+ "trans_drop_eth0": "#99440A",
+ "trans_drop_lo": "#967302",
+ "trans_errs_eth0": "#BF1B00",
+ "trans_errs_lo": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Basic network info per interface",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 13
+ },
+ "hiddenSeries": false,
+ "id": 74,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "recv {{device}}",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "trans {{device}} ",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Basic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "pps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 3,
+ "description": "Disk space used of all filesystems mounted",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 13
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 152,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}}",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk Space Used Basic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": null,
+ "logBase": 1,
+ "max": "100",
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 20
+ },
+ "id": 265,
+ "panels": [
+ {
+ "aliasColors": {
+ "Idle - Waiting for something to happen": "#052B51",
+ "guest": "#9AC48A",
+ "idle": "#052B51",
+ "iowait": "#EAB839",
+ "irq": "#BF1B00",
+ "nice": "#C15C17",
+ "softirq": "#E24D42",
+ "steal": "#FCE2DE",
+ "system": "#508642",
+ "user": "#5195CE"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "description": "",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 0,
+ "y": 21
+ },
+ "hiddenSeries": false,
+ "id": 3,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 250,
+ "sort": null,
+ "sortDesc": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": true,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "interval": "10s",
+ "intervalFactor": 2,
+ "legendFormat": "System - Processes executing in kernel mode",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='user',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "User - Normal processes executing in user mode",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='nice',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Nice - Niced processes executing in user mode",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Idle - Waiting for something to happen",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='iowait',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Iowait - Waiting for I/O to complete",
+ "refId": "E",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='irq',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Irq - Servicing interrupts",
+ "refId": "F",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='softirq',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Softirq - Servicing softirqs",
+ "refId": "G",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='steal',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Steal - Time spent in other operating systems when running in a virtualized environment",
+ "refId": "H",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='guest',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Guest - Time spent running a virtual CPU for a guest operating system",
+ "refId": "I",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "percentage",
+ "logBase": 1,
+ "max": "100",
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap - Swap memory usage": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839",
+ "Unused - Free memory unassigned": "#052B51"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "description": "",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 12,
+ "y": 21
+ },
+ "hiddenSeries": false,
+ "id": 24,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "sort": null,
+ "sortDesc": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Hardware Corrupted - *./",
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"} - node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Apps - Memory used by user-space applications",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "PageTables - Memory used to map between virtual and physical memory addresses",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Cache - Parked file data (file content) cache",
+ "refId": "E",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Buffers - Block device (e.g. harddisk) cache",
+ "refId": "F",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Unused - Free memory unassigned",
+ "refId": "G",
+ "step": 240
+ },
+ {
+ "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Swap - Swap space used",
+ "refId": "H",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_HardwareCorrupted_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working",
+ "refId": "I",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Stack",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "receive_packets_eth0": "#7EB26D",
+ "receive_packets_lo": "#E24D42",
+ "transmit_packets_eth0": "#7EB26D",
+ "transmit_packets_lo": "#E24D42"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 0,
+ "y": 33
+ },
+ "hiddenSeries": false,
+ "id": 84,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:5871",
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Transmit",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:5884",
+ "format": "bps",
+ "label": "bits out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:5885",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 3,
+ "description": "",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 12,
+ "y": 33
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 156,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": false,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}}",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk Space Used",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 0,
+ "y": 45
+ },
+ "hiddenSeries": false,
+ "id": 229,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Read.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - Reads completed",
+ "refId": "A",
+ "step": 480
+ },
+ {
+ "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Writes completed",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk IOps",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "iops",
+ "label": "IO read (-) / write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "io time": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 3,
+ "description": "",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 12,
+ "y": 45
+ },
+ "hiddenSeries": false,
+ "id": 42,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": null,
+ "sortDesc": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*read*./",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*sda.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde.*/",
+ "color": "#E24D42"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Successfully read bytes",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Successfully written bytes",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "I/O Usage Read / Write",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes read (-) / write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ms",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "io time": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 3,
+ "description": "",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 0,
+ "y": 57
+ },
+ "hiddenSeries": false,
+ "id": 127,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": null,
+ "sortDesc": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"} [5m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Time spent doing I/Os",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "I/O Usage Times",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "time",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "CPU / Memory / Net / Disk",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 21
+ },
+ "id": 266,
+ "panels": [
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 70
+ },
+ "hiddenSeries": false,
+ "id": 136,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 2,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Inactive_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Inactive - Memory which has been less recently used. It is more eligible to be reclaimed for other purposes",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Active_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Active - Memory that has been used more recently and usually not reclaimed unless absolutely necessary",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Active / Inactive",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 70
+ },
+ "hiddenSeries": false,
+ "id": 135,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Committed_AS - *./"
+ },
+ {
+ "alias": "/.*CommitLimit - *./",
+ "color": "#BF1B00",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Committed_AS_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Committed_AS - Amount of memory presently allocated on the system",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_CommitLimit_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "CommitLimit - Amount of memory currently available to be allocated on the system",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Commited",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 80
+ },
+ "hiddenSeries": false,
+ "id": 191,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Inactive_file_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Inactive_file - File-backed memory on inactive LRU list",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Inactive_anon_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Inactive_anon - Anonymous and swap cache on inactive LRU list, including tmpfs (shmem)",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Active_file_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Active_file - File-backed memory on active LRU list",
+ "refId": "C",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Active_anon_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Active_anon - Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs",
+ "refId": "D",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Active / Inactive Detail",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "Total Swap": "#614D93",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 80
+ },
+ "hiddenSeries": false,
+ "id": 130,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 2,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Writeback_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Writeback - Memory which is actively being written back to disk",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_WritebackTmp_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "WritebackTmp - Memory used by FUSE for temporary writeback buffers",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Dirty_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Dirty - Memory which is waiting to get written back to the disk",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Writeback and Dirty",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 90
+ },
+ "hiddenSeries": false,
+ "id": 138,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:4131",
+ "alias": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages",
+ "fill": 0
+ },
+ {
+ "$$hashKey": "object:4138",
+ "alias": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Mapped_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Mapped - Used memory in mapped pages files which have been mmaped, such as libraries",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Shmem_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Shmem - Used shared memory (shared between several processes, thus including RAM disks)",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_ShmemHugePages_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages",
+ "refId": "C",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_ShmemPmdMapped_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "ShmemPmdMapped - Ammount of shared (shmem/tmpfs) memory backed by huge pages",
+ "refId": "D",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Shared and Mapped",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:4106",
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:4107",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "Total Swap": "#614D93",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 90
+ },
+ "hiddenSeries": false,
+ "id": 131,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 2,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_SUnreclaim_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "SUnreclaim - Part of Slab, that cannot be reclaimed on memory pressure",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "SReclaimable - Part of Slab, that might be reclaimed, such as caches",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Slab",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 100
+ },
+ "hiddenSeries": false,
+ "id": 70,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_VmallocChunk_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "VmallocChunk - Largest contigious block of vmalloc area which is free",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_VmallocTotal_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "VmallocTotal - Total size of vmalloc memory area",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_VmallocUsed_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "VmallocUsed - Amount of vmalloc area which is used",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Vmalloc",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 100
+ },
+ "hiddenSeries": false,
+ "id": 159,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Bounce_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Bounce - Memory used for block device bounce buffers",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Bounce",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 110
+ },
+ "hiddenSeries": false,
+ "id": 129,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Inactive *./",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_AnonHugePages_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "AnonHugePages - Memory in anonymous huge pages",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_AnonPages_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "AnonPages - Memory in user pages not backed by files",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Anonymous",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 110
+ },
+ "hiddenSeries": false,
+ "id": 160,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 2,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_KernelStack_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "KernelStack - Kernel memory stack. This is not reclaimable",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Percpu_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "PerCPU - Per CPU memory allocated dynamically by loadable modules",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Kernel / CPU",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#806EB7",
+ "Total RAM + Swap": "#806EB7",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 120
+ },
+ "hiddenSeries": false,
+ "id": 140,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_HugePages_Free{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "HugePages_Free - Huge pages in the pool that are not yet allocated",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_HugePages_Rsvd{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "HugePages_Rsvd - Huge pages for which a commitment to allocate from the pool has been made, but no allocation has yet been made",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_HugePages_Surp{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "HugePages_Surp - Huge pages in the pool above the value in /proc/sys/vm/nr_hugepages",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory HugePages Counter",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "pages",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#806EB7",
+ "Total RAM + Swap": "#806EB7",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 120
+ },
+ "hiddenSeries": false,
+ "id": 71,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 2,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_HugePages_Total{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "HugePages - Total size of the pool of huge pages",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Hugepagesize - Huge Page size",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory HugePages Size",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 130
+ },
+ "hiddenSeries": false,
+ "id": 128,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_DirectMap1G_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "DirectMap1G - Amount of pages mapped as this size",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_DirectMap2M_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "DirectMap2M - Amount of pages mapped as this size",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_DirectMap4k_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "DirectMap4K - Amount of pages mapped as this size",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory DirectMap",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 130
+ },
+ "hiddenSeries": false,
+ "id": 137,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Unevictable_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Unevictable - Amount of unevictable memory that can't be swapped out for a variety of reasons",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Mlocked_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "MLocked - Size of pages locked to memory using the mlock() system call",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Unevictable and MLocked",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "Total Swap": "#614D93",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 140
+ },
+ "hiddenSeries": false,
+ "id": 132,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_NFS_Unstable_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "NFS Unstable - Memory in NFS pages sent to the server, but not yet commited to the storage",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory NFS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Memory Meminfo",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 22
+ },
+ "id": 267,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 176,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*out/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_vmstat_pgpgin{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pagesin - Page in operations",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_vmstat_pgpgout{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pagesout - Page out operations",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Pages In / Out",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "pages out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 22,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*out/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_vmstat_pswpin{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pswpin - Pages swapped in",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_vmstat_pswpout{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pswpout - Pages swapped out",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Pages Swap In / Out",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "pages out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 33
+ },
+ "hiddenSeries": false,
+ "id": 175,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:6118",
+ "alias": "Pgfault - Page major and minor fault operations",
+ "fill": 0,
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pgfault - Page major and minor fault operations",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pgmajfault - Major page fault operations",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[5m]) - irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pgminfault - Minor page fault operations",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Page Faults",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6133",
+ "format": "short",
+ "label": "faults",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6134",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "Total Swap": "#614D93",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 33
+ },
+ "hiddenSeries": false,
+ "id": 307,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_vmstat_oom_kill{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "oom killer invocations ",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "OOM Killer",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:5373",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:5374",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Memory Vmstat",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 23
+ },
+ "id": 293,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 24
+ },
+ "hiddenSeries": false,
+ "id": 260,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Variation*./",
+ "color": "#890F02"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_timex_estimated_error_seconds{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Estimated error in seconds",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_timex_offset_seconds{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Time offset in between local system and reference clock",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_timex_maxerror_seconds{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Maximum error in seconds",
+ "refId": "C",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Time Syncronized Drift",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "seconds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 24
+ },
+ "hiddenSeries": false,
+ "id": 291,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_timex_loop_time_constant{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Phase-locked loop time adjust",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Time PLL Adjust",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 34
+ },
+ "hiddenSeries": false,
+ "id": 168,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Variation*./",
+ "color": "#890F02"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_timex_sync_status{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Is clock synchronized to a reliable server (1 = yes, 0 = no)",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_timex_frequency_adjustment_ratio{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Local clock frequency adjustment",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Time Syncronized Status",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 34
+ },
+ "hiddenSeries": false,
+ "id": 294,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_timex_tick_seconds{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Seconds between clock ticks",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_timex_tai_offset_seconds{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "International Atomic Time (TAI) offset",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Time Misc",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "seconds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "title": "System Timesync",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 24
+ },
+ "id": 312,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 7
+ },
+ "hiddenSeries": false,
+ "id": 62,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_procs_blocked{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Processes blocked waiting for I/O to complete",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_procs_running{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Processes in runnable state",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Processes Status",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6500",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6501",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 7
+ },
+ "hiddenSeries": false,
+ "id": 315,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_processes_state{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ state }}",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Processes State",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6500",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6501",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 17
+ },
+ "hiddenSeries": false,
+ "id": 148,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_forks_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Processes forks second",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Processes Forks",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6640",
+ "format": "short",
+ "label": "forks / sec",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6641",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 17
+ },
+ "hiddenSeries": false,
+ "id": 149,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Max.*/",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}[5m])",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Processes virtual memory size in bytes",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "process_resident_memory_max_bytes{instance=\"$node\",job=\"$job\"}",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Maximum amount of virtual memory available in bytes",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "irate(process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}[5m])",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Processes virtual memory size in bytes",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "irate(process_virtual_memory_max_bytes{instance=\"$node\",job=\"$job\"}[5m])",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Maximum amount of virtual memory available in bytes",
+ "refId": "D",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Processes Memory",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "decbytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 27
+ },
+ "hiddenSeries": false,
+ "id": 313,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:709",
+ "alias": "PIDs limit",
+ "color": "#F2495C",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_processes_pids{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Number of PIDs",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_processes_max_processes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "PIDs limit",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "PIDs Number and Limit",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6500",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6501",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 27
+ },
+ "hiddenSeries": false,
+ "id": 305,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:4963",
+ "alias": "/.*waiting.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_schedstat_running_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CPU {{ cpu }} - seconds spent running a process",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CPU {{ cpu }} - seconds spent by processing waiting for this CPU",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Process schedule stats Running / Waiting",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:4860",
+ "format": "s",
+ "label": "seconds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:4861",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 37
+ },
+ "hiddenSeries": false,
+ "id": 314,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:709",
+ "alias": "Threads limit",
+ "color": "#F2495C",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_processes_threads{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Allocated threads",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_processes_max_threads{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Threads limit",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Threads Number and Limit",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6500",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6501",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "title": "System Processes",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 25
+ },
+ "id": 269,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 8
+ },
+ "hiddenSeries": false,
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_context_switches_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Context switches",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "irate(node_intr_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Interrupts",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Context Switches / Interrupts",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 8
+ },
+ "hiddenSeries": false,
+ "id": 7,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_load1{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 4,
+ "legendFormat": "Load 1m",
+ "refId": "A",
+ "step": 480
+ },
+ {
+ "expr": "node_load5{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 4,
+ "legendFormat": "Load 5m",
+ "refId": "B",
+ "step": 480
+ },
+ {
+ "expr": "node_load15{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 4,
+ "legendFormat": "Load 15m",
+ "refId": "C",
+ "step": 480
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "System Load",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6261",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6262",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 18
+ },
+ "hiddenSeries": false,
+ "id": 259,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Critical*./",
+ "color": "#E24D42",
+ "fill": 0
+ },
+ {
+ "alias": "/.*Max*./",
+ "color": "#EF843C",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_interrupts_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ type }} - {{ info }}",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Interrupts Detail",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 18
+ },
+ "hiddenSeries": false,
+ "id": 306,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_schedstat_timeslices_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CPU {{ cpu }}",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Schedule timeslices executed by each cpu",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:4860",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:4861",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 28
+ },
+ "hiddenSeries": false,
+ "id": 151,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_entropy_available_bits{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Entropy available to random number generators",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Entropy",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6568",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6569",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 28
+ },
+ "hiddenSeries": false,
+ "id": 308,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(process_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Time spent",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU time spent in user and system contexts",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:4860",
+ "format": "s",
+ "label": "seconds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:4861",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 38
+ },
+ "hiddenSeries": false,
+ "id": 64,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:6323",
+ "alias": "/.*Max*./",
+ "color": "#890F02",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "process_max_fds{instance=\"$node\",job=\"$job\"}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Maximum open file descriptors",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "process_open_fds{instance=\"$node\",job=\"$job\"}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Open file descriptors",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "File Descriptors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6338",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6339",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "System Misc",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 26
+ },
+ "id": 304,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 26
+ },
+ "hiddenSeries": false,
+ "id": 158,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:6726",
+ "alias": "/.*Critical*./",
+ "color": "#E24D42",
+ "fill": 0
+ },
+ {
+ "$$hashKey": "object:6727",
+ "alias": "/.*Max*./",
+ "color": "#EF843C",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_hwmon_temp_celsius{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ chip }} {{ sensor }} temp",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_hwmon_temp_crit_alarm_celsius{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ chip }} {{ sensor }} Critical Alarm",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_hwmon_temp_crit_celsius{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ chip }} {{ sensor }} Critical",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "node_hwmon_temp_crit_hyst_celsius{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ chip }} {{ sensor }} Critical Historical",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "node_hwmon_temp_max_celsius{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ chip }} {{ sensor }} Max",
+ "refId": "E",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Hardware temperature monitor",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6750",
+ "format": "celsius",
+ "label": "temperature",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6751",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 26
+ },
+ "hiddenSeries": false,
+ "id": 300,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:1655",
+ "alias": "/.*Max*./",
+ "color": "#EF843C",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_cooling_device_cur_state{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Current {{ name }} in {{ type }}",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_cooling_device_max_state{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Max {{ name }} in {{ type }}",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Throttle cooling device",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1678",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1679",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 36
+ },
+ "hiddenSeries": false,
+ "id": 302,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_power_supply_online{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ power_supply }} online",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Power supply",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1678",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1679",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "title": "Hardware Misc",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 27
+ },
+ "id": 296,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 10
+ },
+ "hiddenSeries": false,
+ "id": 297,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_systemd_socket_accepted_connections_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ name }} Connections",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Systemd Sockets",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 10
+ },
+ "hiddenSeries": false,
+ "id": 298,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Failed",
+ "color": "#F2495C"
+ },
+ {
+ "alias": "Inactive",
+ "color": "#FF9830"
+ },
+ {
+ "alias": "Active",
+ "color": "#73BF69"
+ },
+ {
+ "alias": "Deactivating",
+ "color": "#FFCB7D"
+ },
+ {
+ "alias": "Activating",
+ "color": "#C8F2C2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"activating\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Activating",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"active\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Active",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"deactivating\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Deactivating",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"failed\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Failed",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"inactive\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Inactive",
+ "refId": "E",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Systemd Units State",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "title": "Systemd",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 28
+ },
+ "id": 270,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 29
+ },
+ "hiddenSeries": false,
+ "id": 9,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:2033",
+ "alias": "/.*Read.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "$$hashKey": "object:2034",
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "$$hashKey": "object:2035",
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "$$hashKey": "object:2036",
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "$$hashKey": "object:2037",
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "$$hashKey": "object:2038",
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "$$hashKey": "object:2039",
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "$$hashKey": "object:2040",
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "$$hashKey": "object:2041",
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "$$hashKey": "object:2042",
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "$$hashKey": "object:2043",
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "$$hashKey": "object:2044",
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "$$hashKey": "object:2045",
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "$$hashKey": "object:2046",
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "$$hashKey": "object:2047",
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "$$hashKey": "object:2048",
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "$$hashKey": "object:2049",
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "$$hashKey": "object:2050",
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "$$hashKey": "object:2051",
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "$$hashKey": "object:2052",
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "$$hashKey": "object:2053",
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - Reads completed",
+ "refId": "A",
+ "step": 8
+ },
+ {
+ "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Writes completed",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk IOps Completed",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:2186",
+ "format": "iops",
+ "label": "IO read (-) / write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:2187",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 29
+ },
+ "hiddenSeries": false,
+ "id": 33,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Read.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - Read bytes",
+ "refId": "A",
+ "step": 8
+ },
+ {
+ "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Written bytes",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk R/W Data",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": "bytes read (-) / write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 3,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 39
+ },
+ "hiddenSeries": false,
+ "id": 37,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Read.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_read_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "hide": false,
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - Read time",
+ "refId": "A",
+ "step": 8
+ },
+ {
+ "expr": "irate(node_disk_write_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Write time",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk R/W Time",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "time. read (-) / write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 39
+ },
+ "hiddenSeries": false,
+ "id": 35,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_io_time_weighted_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - IO time weighted",
+ "refId": "A",
+ "step": 8
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk IOs Weighted",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "time",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 49
+ },
+ "hiddenSeries": false,
+ "id": 133,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Read.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_reads_merged_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Read merged",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_disk_writes_merged_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Write merged",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk R/W Merged",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "iops",
+ "label": "I/Os",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 3,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 49
+ },
+ "hiddenSeries": false,
+ "id": 36,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - IO time",
+ "refId": "A",
+ "step": 8
+ },
+ {
+ "expr": "irate(node_disk_discard_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "interval": "",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - discard time",
+ "refId": "B",
+ "step": 8
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Time Spent Doing I/Os",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "time",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 59
+ },
+ "hiddenSeries": false,
+ "id": 34,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_io_now{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - IO now",
+ "refId": "A",
+ "step": 8
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk IOs Current in Progress",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "iops",
+ "label": "I/Os",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 59
+ },
+ "hiddenSeries": false,
+ "id": 301,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null as zero",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:2034",
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "$$hashKey": "object:2035",
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "$$hashKey": "object:2036",
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "$$hashKey": "object:2037",
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "$$hashKey": "object:2038",
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "$$hashKey": "object:2039",
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "$$hashKey": "object:2040",
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "$$hashKey": "object:2041",
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "$$hashKey": "object:2042",
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "$$hashKey": "object:2043",
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "$$hashKey": "object:2044",
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "$$hashKey": "object:2045",
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "$$hashKey": "object:2046",
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "$$hashKey": "object:2047",
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "$$hashKey": "object:2048",
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "$$hashKey": "object:2049",
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "$$hashKey": "object:2050",
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "$$hashKey": "object:2051",
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "$$hashKey": "object:2052",
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "$$hashKey": "object:2053",
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_discards_completed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "interval": "",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - Discards completed",
+ "refId": "A",
+ "step": 8
+ },
+ {
+ "expr": "irate(node_disk_discards_merged_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Discards merged",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk IOps Discards completed / merged",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:2186",
+ "format": "iops",
+ "label": "IOs",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:2187",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Storage Disk",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 29
+ },
+ "id": 271,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 3,
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 78
+ },
+ "hiddenSeries": false,
+ "id": 43,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - Available",
+ "metric": "",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_filesystem_free_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - Free",
+ "refId": "B",
+ "step": 2
+ },
+ {
+ "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - Size",
+ "refId": "C",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Filesystem space available",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:3826",
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:3827",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 78
+ },
+ "hiddenSeries": false,
+ "id": 41,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_files_free{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - Free file nodes",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "File Nodes Free",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:3894",
+ "format": "short",
+ "label": "file nodes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:3895",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 88
+ },
+ "hiddenSeries": false,
+ "id": 28,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filefd_maximum{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 4,
+ "legendFormat": "Max open files",
+ "refId": "A",
+ "step": 8
+ },
+ {
+ "expr": "node_filefd_allocated{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Open files",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "File Descriptor",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "files",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 88
+ },
+ "hiddenSeries": false,
+ "id": 219,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_files{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - File nodes total",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "File Nodes Size",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "file Nodes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "/ ReadOnly": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 98
+ },
+ "hiddenSeries": false,
+ "id": 44,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_readonly{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - ReadOnly",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_filesystem_device_error{instance=\"$node\",job=\"$job\",device!~'rootfs',fstype!~'tmpfs'}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - Device error",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Filesystem in ReadOnly / Error",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:3670",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": "1",
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:3671",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Storage Filesystem",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 30
+ },
+ "id": 272,
+ "panels": [
+ {
+ "aliasColors": {
+ "receive_packets_eth0": "#7EB26D",
+ "receive_packets_lo": "#E24D42",
+ "transmit_packets_eth0": "#7EB26D",
+ "transmit_packets_lo": "#E24D42"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 30
+ },
+ "hiddenSeries": false,
+ "id": 60,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_packets_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_network_transmit_packets_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Transmit",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic by Packets",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 30
+ },
+ "hiddenSeries": false,
+ "id": 142,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_errs_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive errors",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_network_transmit_errs_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Rransmit errors",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Errors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 40
+ },
+ "hiddenSeries": false,
+ "id": 143,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_drop_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive drop",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_network_transmit_drop_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Transmit drop",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Drop",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 40
+ },
+ "hiddenSeries": false,
+ "id": 141,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_compressed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive compressed",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_network_transmit_compressed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Transmit compressed",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Compressed",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 50
+ },
+ "hiddenSeries": false,
+ "id": 146,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_multicast_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive multicast",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Multicast",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 50
+ },
+ "hiddenSeries": false,
+ "id": 144,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_fifo_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive fifo",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_network_transmit_fifo_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Transmit fifo",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Fifo",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 60
+ },
+ "hiddenSeries": false,
+ "id": 145,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:576",
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_frame_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive frame",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Frame",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:589",
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:590",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 60
+ },
+ "hiddenSeries": false,
+ "id": 231,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_transmit_carrier_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Statistic transmit_carrier",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Carrier",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 70
+ },
+ "hiddenSeries": false,
+ "id": 232,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_transmit_colls_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Transmit colls",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Colls",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 70
+ },
+ "hiddenSeries": false,
+ "id": 61,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:663",
+ "alias": "NF conntrack limit",
+ "color": "#890F02",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_nf_conntrack_entries{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "NF conntrack entries",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_nf_conntrack_entries_limit{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "NF conntrack limit",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "NF Contrack",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:678",
+ "format": "short",
+ "label": "entries",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:679",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 80
+ },
+ "hiddenSeries": false,
+ "id": 230,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_arp_entries{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ device }} - ARP entries",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "ARP Entries",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Entries",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 80
+ },
+ "hiddenSeries": false,
+ "id": 288,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_network_mtu_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ device }} - Bytes",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MTU",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 90
+ },
+ "hiddenSeries": false,
+ "id": 280,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_network_speed_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ device }} - Speed",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Speed",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 90
+ },
+ "hiddenSeries": false,
+ "id": 289,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_network_transmit_queue_length{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ device }} - Interface transmit queue length",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Queue Length",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "none",
+ "label": "packets",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 100
+ },
+ "hiddenSeries": false,
+ "id": 290,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:232",
+ "alias": "/.*Dropped.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_softnet_processed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CPU {{cpu}} - Processed",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_softnet_dropped_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CPU {{cpu}} - Dropped",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Softnet Packets",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:207",
+ "format": "short",
+ "label": "packetes drop (-) / process (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:208",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 100
+ },
+ "hiddenSeries": false,
+ "id": 310,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_softnet_times_squeezed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CPU {{cpu}} - Squeezed",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Softnet Out of Quota",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:207",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:208",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 110
+ },
+ "hiddenSeries": false,
+ "id": 309,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_network_up{operstate=\"up\",instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{interface}} - Operational state UP",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_network_carrier{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "instant": false,
+ "legendFormat": "{{device}} - Physical link state",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Operational Status",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Network Traffic",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 31
+ },
+ "id": 273,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 13
+ },
+ "hiddenSeries": false,
+ "id": 63,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_sockstat_TCP_alloc{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "TCP_alloc - Allocated sockets",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_TCP_inuse{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "TCP_inuse - Tcp sockets currently in use",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_TCP_mem{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "TCP_mem - Used memory for tcp",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_TCP_orphan{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "TCP_orphan - Orphan sockets",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_TCP_tw{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "TCP_tw - Sockets wating close",
+ "refId": "E",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Sockstat TCP",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 13
+ },
+ "hiddenSeries": false,
+ "id": 124,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_sockstat_UDPLITE_inuse{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "UDPLITE_inuse - Udplite sockets currently in use",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_UDP_inuse{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "UDP_inuse - Udp sockets currently in use",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_UDP_mem{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "UDP_mem - Used memory for udp",
+ "refId": "C",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Sockstat UDP",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 126,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_sockstat_sockets_used{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Sockets_used - Sockets currently in use",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Sockstat Used",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "sockets",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 220,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_sockstat_TCP_mem_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "mem_bytes - TCP sockets in that state",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_UDP_mem_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "mem_bytes - UDP sockets in that state",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Sockstat Memory Size",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 33
+ },
+ "hiddenSeries": false,
+ "id": 125,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_sockstat_FRAG_inuse{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "FRAG_inuse - Frag sockets currently in use",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_FRAG_memory{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "FRAG_memory - Used memory for frag",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_RAW_inuse{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "RAW_inuse - Raw sockets currently in use",
+ "refId": "C",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Sockstat FRAG / RAW",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1572",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1573",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Network Sockstat",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 32
+ },
+ "id": 274,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 32
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 221,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:1876",
+ "alias": "/.*Out.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_IpExt_InOctets{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "InOctets - Received octets",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_IpExt_OutOctets{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "OutOctets - Sent octets",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Netstat IP In / Out Octets",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1889",
+ "format": "short",
+ "label": "octects out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1890",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 32
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 81,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Ip_Forwarding{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Forwarding - IP forwarding",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Netstat IP Forwarding",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1957",
+ "format": "short",
+ "label": "datagrams",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1958",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 42
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 115,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Out.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Icmp_InMsgs{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "InMsgs - Messages which the entity received. Note that this counter includes all those counted by icmpInErrors",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Icmp_OutMsgs{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "OutMsgs - Messages which this entity attempted to send. Note that this counter includes all those counted by icmpOutErrors",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "ICMP In / Out",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "messages out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 42
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 50,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Out.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Icmp_InErrors{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "InErrors - Messages which the entity received but determined as having ICMP-specific errors (bad ICMP checksums, bad length, etc.)",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "ICMP Errors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "messages out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 52
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 55,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Out.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*Snd.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Udp_InDatagrams{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "InDatagrams - Datagrams received",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Udp_OutDatagrams{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "OutDatagrams - Datagrams sent",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "UDP In / Out",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "datagrams out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 52
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 109,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Udp_InErrors{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "InErrors - UDP Datagrams that could not be delivered to an application",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Udp_NoPorts{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "NoPorts - UDP Datagrams received on a port with no listener",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_UdpLite_InErrors{instance=\"$node\",job=\"$job\"}[5m])",
+ "interval": "",
+ "legendFormat": "InErrors Lite - UDPLite Datagrams that could not be delivered to an application",
+ "refId": "C"
+ },
+ {
+ "expr": "irate(node_netstat_Udp_RcvbufErrors{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "RcvbufErrors - UDP buffer errors received",
+ "refId": "D",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Udp_SndbufErrors{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "SndbufErrors - UDP buffer errors send",
+ "refId": "E",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "UDP Errors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:4232",
+ "format": "short",
+ "label": "datagrams",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:4233",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 62
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 299,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Out.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*Snd.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Tcp_InSegs{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "InSegs - Segments received, including those received in error. This count includes segments received on currently established connections",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Tcp_OutSegs{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "OutSegs - Segments sent, including those on current connections but excluding those containing only retransmitted octets",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP In / Out",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "datagrams out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 62
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 104,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_TcpExt_ListenOverflows{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "ListenOverflows - Times the listen queue of a socket overflowed",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_TcpExt_ListenDrops{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "ListenDrops - SYNs to LISTEN sockets ignored",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_TcpExt_TCPSynRetrans{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "TCPSynRetrans - SYN-SYN/ACK retransmits to break down retransmissions in SYN, fast/timeout retransmits",
+ "refId": "C",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Tcp_RetransSegs{instance=\"$node\",job=\"$job\"}[5m])",
+ "interval": "",
+ "legendFormat": "RetransSegs - Segments retransmitted - that is, the number of TCP segments transmitted containing one or more previously transmitted octets",
+ "refId": "D"
+ },
+ {
+ "expr": "irate(node_netstat_Tcp_InErrs{instance=\"$node\",job=\"$job\"}[5m])",
+ "interval": "",
+ "legendFormat": "InErrs - Segments received in error (e.g., bad TCP checksums)",
+ "refId": "E"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP Errors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 72
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 85,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:454",
+ "alias": "/.*MaxConn *./",
+ "color": "#890F02",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_netstat_Tcp_CurrEstab{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CurrEstab - TCP connections for which the current state is either ESTABLISHED or CLOSE- WAIT",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_netstat_Tcp_MaxConn{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "MaxConn - Limit on the total number of TCP connections the entity can support (Dinamic is \"-1\")",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP Connections",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:469",
+ "format": "short",
+ "label": "connections",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:470",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 72
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 91,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Sent.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_TcpExt_SyncookiesFailed{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "SyncookiesFailed - Invalid SYN cookies received",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_TcpExt_SyncookiesRecv{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "SyncookiesRecv - SYN cookies received",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_TcpExt_SyncookiesSent{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "SyncookiesSent - SYN cookies sent",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP SynCookie",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 82
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 82,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Tcp_ActiveOpens{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "ActiveOpens - TCP connections that have made a direct transition to the SYN-SENT state from the CLOSED state",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Tcp_PassiveOpens{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "PassiveOpens - TCP connections that have made a direct transition to the SYN-RCVD state from the LISTEN state",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP Direct Transition",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "connections",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Network Netstat",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 33
+ },
+ "id": 279,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 54
+ },
+ "hiddenSeries": false,
+ "id": 40,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_scrape_collector_duration_seconds{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{collector}} - Scrape duration",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Node Exporter Scrape Time",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "seconds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 54
+ },
+ "hiddenSeries": false,
+ "id": 157,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:1969",
+ "alias": "/.*error.*/",
+ "color": "#F2495C",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_scrape_collector_success{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{collector}} - Scrape success",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_textfile_scrape_error{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{collector}} - Scrape textfile error (1 = true)",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Node Exporter Scrape",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1484",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1485",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Node Exporter",
+ "type": "row"
+ }
+ ],
+ "refresh": "1m",
+ "schemaVersion": 22,
+ "style": "dark",
+ "tags": [
+ "linux"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "datasource",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "",
+ "hide": 0,
+ "includeAll": false,
+ "index": -1,
+ "label": "Job",
+ "multi": false,
+ "name": "job",
+ "options": [],
+ "query": "label_values(node_uname_info, job)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(node_uname_info{job=\"$job\"}, instance)",
+ "hide": 0,
+ "includeAll": false,
+ "index": -1,
+ "label": "Host:",
+ "multi": false,
+ "name": "node",
+ "options": [],
+ "query": "label_values(node_uname_info{job=\"$job\"}, instance)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": false,
+ "text": "[a-z]+|nvme[0-9]+n[0-9]+",
+ "value": "[a-z]+|nvme[0-9]+n[0-9]+"
+ },
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "diskdevices",
+ "options": [
+ {
+ "selected": true,
+ "text": "[a-z]+|nvme[0-9]+n[0-9]+",
+ "value": "[a-z]+|nvme[0-9]+n[0-9]+"
+ }
+ ],
+ "query": "[a-z]+|nvme[0-9]+n[0-9]+",
+ "skipUrlSync": false,
+ "type": "custom"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-4h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Node Exporter",
+ "version": 1
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/nomad.json b/terraform-ci-infra/1n_nmd/grafana/conf/nomad.json
new file mode 100644
index 0000000000..40ffeddf7b
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/nomad.json
@@ -0,0 +1,869 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "5.3.2"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": "5.0.0"
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "5.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "Nomad Jobs metrics",
+ "editable": true,
+ "gnetId": 12787,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1596708119930,
+ "links": [],
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "dtdurations",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 0,
+ "y": 0
+ },
+ "id": 16,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "alias": "",
+ "expr": "max(nomad_client_uptime{instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "rawSql": "SELECT\n UNIX_TIMESTAMP(<time_column>) as time_sec,\n <value column> as value,\n <series name column> as metric\nFROM <table name>\nWHERE $__timeFilter(time_column)\nORDER BY <time_column> ASC\n",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Uptime",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorPrefix": false,
+ "colorValue": false,
+ "colors": [
+ "#7eb26d",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 4,
+ "y": 0
+ },
+ "id": 17,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "alias": "",
+ "expr": "count(sum(nomad_client_allocs_memory_cache) by (exported_job))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "rawSql": "SELECT\n UNIX_TIMESTAMP(<time_column>) as time_sec,\n <value column> as value,\n <series name column> as metric\nFROM <table name>\nWHERE $__timeFilter(time_column)\nORDER BY <time_column> ASC\n",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Jobs",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorPrefix": false,
+ "colorValue": false,
+ "colors": [
+ "#7eb26d",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 8,
+ "y": 0
+ },
+ "id": 12,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "alias": "",
+ "expr": "sum(nomad_client_allocations_running{datacenter=\"$datacenter\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "rawSql": "SELECT\n UNIX_TIMESTAMP(<time_column>) as time_sec,\n <value column> as value,\n <series name column> as metric\nFROM <table name>\nWHERE $__timeFilter(time_column)\nORDER BY <time_column> ASC\n",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Allocs",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 4,
+ "w": 12,
+ "x": 12,
+ "y": 0
+ },
+ "id": 14,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "",
+ "expr": "sum(nomad_client_allocations_blocked{datacenter=\"$datacenter\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "Blocked",
+ "rawSql": "SELECT\n UNIX_TIMESTAMP(<time_column>) as time_sec,\n <value column> as value,\n <series name column> as metric\nFROM <table name>\nWHERE $__timeFilter(time_column)\nORDER BY <time_column> ASC\n",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(nomad_client_allocations_pending{datacenter=\"$datacenter\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "Pending",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(nomad_client_allocations_restart{datacenter=\"$datacenter\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "Restart ",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Block/Peding/Restart",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 12,
+ "x": 0,
+ "y": 13
+ },
+ "id": 2,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "host",
+ "repeatDirection": "v",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(nomad_client_allocs_cpu_total_percent[5m:10s]) > 1",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{task}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Usage Percent",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 3,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 12,
+ "x": 12,
+ "y": 13
+ },
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "host",
+ "repeatDirection": "v",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(nomad_client_allocs_cpu_total_ticks{instance=~\"$instance\"}) by(exported_job, task)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{task}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Total Ticks",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 3,
+ "format": "timeticks",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 12,
+ "x": 0,
+ "y": 19
+ },
+ "id": 6,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "host",
+ "repeatDirection": "v",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(nomad_client_allocs_memory_rss{instance=~\"$instance\"}) by(exported_job, task)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{task}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "RSS",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 3,
+ "format": "decbytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 12,
+ "x": 12,
+ "y": 19
+ },
+ "id": 7,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "host",
+ "repeatDirection": "v",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(nomad_client_allocs_memory_cache{instance=~\"$instance\"}) by(exported_job, task)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{task}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Cache",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 3,
+ "format": "decbytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": false,
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "datasource",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "hide": 0,
+ "includeAll": false,
+ "label": "DC",
+ "multi": false,
+ "name": "datacenter",
+ "options": [],
+ "query": "label_values(nomad_client_uptime, datacenter)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Host",
+ "multi": true,
+ "name": "instance",
+ "options": [],
+ "query": "label_values(nomad_client_uptime{datacenter=~\"$datacenter\"}, instance)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-4h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Nomad",
+ "version": 1
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/nomad/grafana.hcl b/terraform-ci-infra/1n_nmd/grafana/conf/nomad/grafana.hcl
new file mode 100644
index 0000000000..7325c6aef4
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/nomad/grafana.hcl
@@ -0,0 +1,331 @@
+job "${job_name}" {
+ # The "region" parameter specifies the region in which to execute the job.
+ # If omitted, this inherits the default region name of "global".
+ # region = "global"
+ #
+ # The "datacenters" parameter specifies the list of datacenters which should
+ # be considered when placing this task. This must be provided.
+ datacenters = "${datacenters}"
+
+ # The "type" parameter controls the type of job, which impacts the scheduler's
+ # decision on placement. This configuration is optional and defaults to
+ # "service". For a full list of job types and their differences, please see
+ # the online documentation.
+ #
+ # For more information, please see the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/jobspec/schedulers
+ #
+ type = "service"
+
+ update {
+ # The "max_parallel" parameter specifies the maximum number of updates to
+ # perform in parallel. In this case, this specifies to update a single task
+ # at a time.
+ max_parallel = 1
+
+ health_check = "checks"
+
+ # The "min_healthy_time" parameter specifies the minimum time the allocation
+ # must be in the healthy state before it is marked as healthy and unblocks
+ # further allocations from being updated.
+ min_healthy_time = "10s"
+
+ # The "healthy_deadline" parameter specifies the deadline in which the
+ # allocation must be marked as healthy after which the allocation is
+ # automatically transitioned to unhealthy. Transitioning to unhealthy will
+ # fail the deployment and potentially roll back the job if "auto_revert" is
+ # set to true.
+ healthy_deadline = "3m"
+
+ # The "progress_deadline" parameter specifies the deadline in which an
+ # allocation must be marked as healthy. The deadline begins when the first
+ # allocation for the deployment is created and is reset whenever an allocation
+ # as part of the deployment transitions to a healthy state. If no allocation
+ # transitions to the healthy state before the progress deadline, the
+ # deployment is marked as failed.
+ progress_deadline = "10m"
+
+%{ if use_canary }
+ # The "canary" parameter specifies that changes to the job that would result
+ # in destructive updates should create the specified number of canaries
+ # without stopping any previous allocations. Once the operator determines the
+ # canaries are healthy, they can be promoted which unblocks a rolling update
+ # of the remaining allocations at a rate of "max_parallel".
+ #
+ # Further, setting "canary" equal to the count of the task group allows
+ # blue/green deployments. When the job is updated, a full set of the new
+ # version is deployed and upon promotion the old version is stopped.
+ canary = 1
+
+ # Specifies if the job should auto-promote to the canary version when all
+ # canaries become healthy during a deployment. Defaults to false which means
+ # canaries must be manually updated with the nomad deployment promote
+ # command.
+ auto_promote = true
+
+ # The "auto_revert" parameter specifies if the job should auto-revert to the
+ # last stable job on deployment failure. A job is marked as stable if all the
+ # allocations as part of its deployment were marked healthy.
+ auto_revert = true
+%{ endif }
+ }
+
+ # The "group" stanza defines a series of tasks that should be co-located on
+ # the same Nomad client. Any task within a group will be placed on the same
+ # client.
+ #
+ # For more information and examples on the "group" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/group
+ #
+ group "prod-group1-${service_name}" {
+ # The "count" parameter specifies the number of the task groups that should
+ # be running under this group. This value must be non-negative and defaults
+ # to 1.
+ count = ${group_count}
+
+
+ # The constraint allows restricting the set of eligible nodes. Constraints
+ # may filter on attributes or client metadata.
+ #
+ # For more information and examples on the "volume" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/constraint
+ #
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+
+ # The "task" stanza creates an individual unit of work, such as a Docker
+ # container, web application, or batch processing.
+ #
+ # For more information and examples on the "task" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/task
+ #
+ task "prod-task1-${service_name}" {
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "docker"
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ image = "${image}"
+ dns_servers = [ "$${attr.unique.network.ip-address}" ]
+ volumes = [
+ "secrets/prometheus.yml:/etc/grafana/provisioning/datasources/prometheus.yml",
+ "secrets/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml",
+ "secrets/grafana.ini:/etc/grafana/grafana.ini",
+ "secrets/node_exporter.json:/etc/grafana/provisioning/dashboards/node_exporter.json",
+ "secrets/docker_cadvisor.json:/etc/grafana/provisioning/dashboards/docker_cadvisor.json",
+ "secrets/nomad.json:/etc/grafana/provisioning/dashboards/nomad.json",
+ "secrets/consul.json:/etc/grafana/provisioning/dashboards/consul.json",
+ "secrets/prometheus.json:/etc/grafana/provisioning/dashboards/prometheus.json",
+ "secrets/blackbox_exporter_http.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_http.json",
+ "secrets/blackbox_exporter_icmp.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_icmp.json"
+ ]
+ }
+
+ artifact {
+ # Prometheus Node Exporter
+ source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/node_exporter.json"
+ destination = "secrets/"
+ }
+
+ artifact {
+ # Docker cAdvisor
+ source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/docker_cadvisor.json"
+ destination = "secrets/"
+ }
+
+ artifact {
+ # Nomad
+ source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/nomad.json"
+ destination = "secrets/"
+ }
+
+ artifact {
+ # Consul
+ source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/consul.json"
+ destination = "secrets/"
+ }
+
+ artifact {
+ # Prometheus
+ source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/prometheus.json"
+ destination = "secrets/"
+ }
+
+ artifact {
+ # Prometheus Blackbox Exporter HTTP
+ source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_http.json"
+ destination = "secrets/"
+ }
+
+ artifact {
+ # Prometheus Blackbox Exporter ICMP
+ source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_icmp.json"
+ destination = "secrets/"
+ }
+
+ # The "template" stanza instructs Nomad to manage a template, such as
+ # a configuration file or script. This template can optionally pull data
+ # from Consul or Vault to populate runtime configuration data.
+ #
+ # For more information and examples on the "template" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/template
+ #
+ template {
+ change_mode = "noop"
+ change_signal = "SIGINT"
+ destination = "secrets/prometheus.yml"
+ data = <<EOH
+apiVersion: 1
+datasources:
+- name: Prometheus
+ type: prometheus
+ access: direct
+ orgId: 1
+ url: http://prometheus.service.consul:9090
+ basicAuth: false
+ isDefault: true
+ version: 1
+ editable: false
+EOH
+ }
+
+ template {
+ change_mode = "noop"
+ change_signal = "SIGINT"
+ destination = "secrets/dashboards.yml"
+ data = <<EOH
+apiVersion: 1
+providers:
+- name: dashboards
+ type: file
+ disableDeletion: false
+ updateIntervalSeconds: 10
+ allowUiUpdates: false
+ options:
+ path: /etc/grafana/provisioning/dashboards
+ foldersFromFilesStructure: true
+EOH
+ }
+
+ template {
+ change_mode = "noop"
+ change_signal = "SIGINT"
+ destination = "secrets/grafana.ini"
+ data = <<EOH
+app_mode = production
+
+[metrics]
+enabled = true
+
+[server]
+protocol = http
+http_port = ${port}
+root_url = http://${service_name}.service.consul:${port}
+enable_gzip = true
+;cert_file =
+;cert_key =
+
+[security]
+admin_user = grafanauser
+admin_password = Grafana1234
+secret_key = SW2YcwTIb9zpOOhoPsMm
+
+[users]
+allow_sign_up = false
+allow_org_create = false
+auto_assign_org = true
+auto_assign_org_role = Viewer
+default_theme = dark
+
+[auth.basic]
+enabled = true
+
+[auth]
+disable_login_form = false
+disable_signout_menu = false
+
+[auth.anonymous]
+enabled = false
+
+[log]
+mode = console
+level = info
+
+[log.console]
+level = info
+format = console
+EOH
+ }
+
+ # The service stanza instructs Nomad to register a service with Consul.
+ #
+ # For more information and examples on the "task" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/service
+ #
+ service {
+ name = "${service_name}"
+ port = "${service_name}"
+ tags = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ]
+ check {
+ name = "Grafana Check Live"
+ type = "http"
+ protocol = "http"
+ tls_skip_verify = true
+ path = "/api/health"
+ interval = "10s"
+ timeout = "2s"
+ }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # For more information and examples on the "resources" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = ${cpu}
+ memory = ${mem}
+ # The network stanza specifies the networking requirements for the task
+ # group, including the network mode and port allocations. When scheduling
+ # jobs in Nomad they are provisioned across your fleet of machines along
+ # with other jobs and services. Because you don't know in advance what host
+ # your job will be provisioned on, Nomad will provide your tasks with
+ # network configuration when they start up.
+ #
+ # For more information and examples on the "template" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/network
+ #
+ network {
+ port "${service_name}" {
+ static = ${port}
+ }
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/prometheus.json b/terraform-ci-infra/1n_nmd/grafana/conf/prometheus.json
new file mode 100644
index 0000000000..0f07574b07
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/prometheus.json
@@ -0,0 +1,3055 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "prometheus",
+ "description": "A prometheus server with prometheus server metrics",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "4.5.0-beta1"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": ""
+ },
+ {
+ "type": "panel",
+ "id": "table",
+ "name": "Table",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "enable": true,
+ "expr": "sum(changes(prometheus_config_last_reload_success_timestamp_seconds{instance=~\"$instance\"}[10m])) by (instance)",
+ "hide": false,
+ "iconColor": "rgb(0, 96, 19)",
+ "limit": 100,
+ "name": "reloads",
+ "showIn": 0,
+ "step": "5m",
+ "type": "alert"
+ },
+ {
+ "datasource": "${DS_PROMETHEUS}",
+ "enable": true,
+ "expr": "count(sum(up{instance=\"$instance\"}) by (instance) < 1)",
+ "hide": false,
+ "iconColor": "rgba(255, 96, 96, 1)",
+ "limit": 100,
+ "name": "down",
+ "showIn": 0,
+ "step": "5m",
+ "type": "alert"
+ }
+ ]
+ },
+ "description": "Get started faster with Grafana Cloud then easily build these dashboards. https://grafana.com/products/cloud/\nOverview of metrics from Prometheus 2.0. \nUseful for using prometheus to monitor your prometheus.\nRevisions welcome!",
+ "editable": true,
+ "gnetId": 3662,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "refresh": "30s",
+ "rows": [
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 3,
+ "description": "Percentage of uptime during the most recent $interval period. Change the period with the 'interval' dropdown above.",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": false
+ },
+ "id": 2,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "%",
+ "postfixFontSize": "100%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg(avg_over_time(up{instance=~\"$instance\",job=~\"$job\"}[$interval]) * 100)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "thresholds": "90, 99",
+ "title": "Uptime [$interval]",
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "columns": [],
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Servers which are DOWN RIGHT NOW! \nFIX THEM!!",
+ "fontSize": "100%",
+ "hideTimeOverride": true,
+ "id": 25,
+ "links": [],
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 0,
+ "desc": true
+ },
+ "span": 3,
+ "styles": [
+ {
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "hidden"
+ },
+ {
+ "alias": "",
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "/__name__|job|Value/",
+ "thresholds": [],
+ "type": "hidden",
+ "unit": "short"
+ },
+ {
+ "alias": " ",
+ "colorMode": "cell",
+ "colors": [
+ "rgba(255, 0, 0, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(255, 0, 0, 0.97)"
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "link": false,
+ "pattern": "instance",
+ "thresholds": [
+ "",
+ "",
+ ""
+ ],
+ "type": "string",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "up{instance=~\"$instance\",job=~\"$job\"} < 1",
+ "format": "table",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "timeFrom": "1s",
+ "title": "Currently Down",
+ "transform": "table",
+ "type": "table"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Total number of time series in prometheus",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 12,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "B",
+ "step": 40
+ }
+ ],
+ "thresholds": "1000000,2000000",
+ "title": "Total Series",
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 14,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 3,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "B",
+ "step": 40
+ }
+ ],
+ "thresholds": "",
+ "title": "Memory Chunks",
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "at a glance",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 236,
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "The total number of rule group evaluations missed due to slow rule group evaluation.",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 16,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(sum_over_time(prometheus_evaluator_iterations_missed_total{job=~\"$job\",instance=~\"$instance\"}[$interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "thresholds": "1,10",
+ "title": "Missed Iterations [$interval]",
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "The total number of rule group evaluations skipped due to throttled metric storage.",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 18,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(sum_over_time(prometheus_evaluator_iterations_skipped_total{job=~\"$job\",instance=~\"$instance\"}[$interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "thresholds": "1,10",
+ "title": "Skipped Iterations [$interval]",
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Total number of scrapes that hit the sample limit and were rejected.",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 19,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(sum_over_time(prometheus_target_scrapes_exceeded_sample_limit_total{job=~\"$job\",instance=~\"$instance\"}[$interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "thresholds": "1,10",
+ "title": "Tardy Scrapes [$interval]",
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Number of times the database failed to reload block data from disk.",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 13,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(sum_over_time(prometheus_tsdb_reloads_failures_total{job=~\"$job\",instance=~\"$instance\"}[$interval]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "thresholds": "1,10",
+ "title": "Reload Failures [$interval]",
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Sum of all skipped scrapes",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 20,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(sum_over_time(prometheus_target_scrapes_exceeded_sample_limit_total{job=~\"$job\",instance=~\"$instance\"}[$interval])) + \nsum(sum_over_time(prometheus_target_scrapes_sample_duplicate_timestamp_total{job=~\"$job\",instance=~\"$instance\"}[$interval])) + \nsum(sum_over_time(prometheus_target_scrapes_sample_out_of_bounds_total{job=~\"$job\",instance=~\"$instance\"}[$interval])) + \nsum(sum_over_time(prometheus_target_scrapes_sample_out_of_order_total{job=~\"$job\",instance=~\"$instance\"}[$interval])) ",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 40
+ }
+ ],
+ "thresholds": "1,10",
+ "title": "Skipped Scrapes [$interval]",
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "quick numbers",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "All non-zero failures and errors",
+ "fill": 1,
+ "id": 33,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(increase(net_conntrack_dialer_conn_failed_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Failed Connections",
+ "refId": "A",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_evaluator_iterations_missed_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Missed Iterations",
+ "refId": "B",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_evaluator_iterations_skipped_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Skipped Iterations",
+ "refId": "C",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_rule_evaluation_failures_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Evaluation",
+ "refId": "D",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_azure_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Azure Refresh",
+ "refId": "E",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_consul_rpc_failures_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Consul RPC",
+ "refId": "F",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_dns_lookup_failures_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "DNS Lookup",
+ "refId": "G",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_ec2_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "EC2 Refresh",
+ "refId": "H",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_gce_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "GCE Refresh",
+ "refId": "I",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_marathon_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Marathon Refresh",
+ "refId": "J",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_openstack_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Openstack Refresh",
+ "refId": "K",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_sd_triton_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Triton Refresh",
+ "refId": "L",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Sample Limit",
+ "refId": "M",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Duplicate Timestamp",
+ "refId": "N",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Timestamp Out of Bounds",
+ "refId": "O",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Sample Out of Order",
+ "refId": "P",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_treecache_zookeeper_failures_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Zookeeper",
+ "refId": "Q",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_tsdb_compactions_failed_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "TSDB Compactions",
+ "refId": "R",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_tsdb_head_series_not_found{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Series Not Found",
+ "refId": "S",
+ "step": 2
+ },
+ {
+ "expr": "sum(increase(prometheus_tsdb_reloads_failures_total{instance=~\"$instance\"}[5m])) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Reload",
+ "refId": "T",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Failures and Errors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Errors",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "errors",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 1,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "up{instance=~\"$instance\",job=~\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{instance}}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Upness (stacked)",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "none",
+ "label": "Up",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 5,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Storage Memory Chunks",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Chunks",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "up",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Series Count",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Series",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 32,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "removed",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum( increase(prometheus_tsdb_head_series_created_total{instance=~\"$instance\"}[5m]) )",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "created",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "sum( increase(prometheus_tsdb_head_series_removed_total{instance=~\"$instance\"}[5m]) )",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "removed",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Series Created / Removed",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Series Count",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "series",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {
+ "10.58.3.10:80": "#BA43A9"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Rate of total number of appended samples",
+ "fill": 1,
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(prometheus_tsdb_head_samples_appended_total{job=~\"$job\",instance=~\"$instance\"}[1m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Appended Samples per Second",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Samples / Second",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "appended samples",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Total number of syncs that were executed on a scrape pool.",
+ "fill": 1,
+ "id": 6,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_target_scrape_pool_sync_total{job=~\"$job\",instance=~\"$instance\"}) by (scrape_job)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{scrape_job}}",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Scrape Sync Total",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Syncs",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Actual interval to sync the scrape pool.",
+ "fill": 1,
+ "id": 21,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[2m])) by (scrape_job) * 1000",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{scrape_job}}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Target Sync",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Milliseconds",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "sync",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 29,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "scrape_duration_seconds{instance=~\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Scrape Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Seconds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Total number of rejected scrapes",
+ "fill": 1,
+ "id": 30,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_target_scrapes_exceeded_sample_limit_total{job=~\"$job\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "exceeded sample limit",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "sum(prometheus_target_scrapes_sample_duplicate_timestamp_total{job=~\"$job\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "duplicate timestamp",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "sum(prometheus_target_scrapes_sample_out_of_bounds_total{job=~\"$job\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "out of bounds",
+ "refId": "C",
+ "step": 4
+ },
+ {
+ "expr": "sum(prometheus_target_scrapes_sample_out_of_order_total{job=~\"$job\",instance=~\"$instance\"}) ",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "out of order",
+ "refId": "D",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Rejected Scrapes",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "short",
+ "label": "Scrapes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "scrapes",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "The duration of rule group evaluations",
+ "fill": 1,
+ "id": 10,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "1000 * rate(prometheus_evaluator_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\"}[5m]) / rate(prometheus_evaluator_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "E",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Average Rule Evaluation Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Milliseconds",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 11,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(http_request_duration_microseconds_count{job=~\"$job\",instance=~\"$instance\"}[1m])) by (handler) > 0",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{handler}}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "HTTP Request Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Microseconds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 15,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(prometheus_engine_query_duration_seconds_sum{job=~\"$job\",instance=~\"$instance\"}) by (slice)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{slice}}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Prometheus Engine Query Duration Seconds",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Seconds",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Rule-group evaluations \n - total\n - missed due to slow rule group evaluation\n - skipped due to throttled metric storage",
+ "fill": 1,
+ "id": 31,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(prometheus_evaluator_iterations_total{job=~\"$job\", instance=~\"$instance\"}[5m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Total",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "sum(rate(prometheus_evaluator_iterations_missed_total{job=~\"$job\", instance=~\"$instance\"}[5m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Missed",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "sum(rate(prometheus_evaluator_iterations_skipped_total{job=~\"$job\", instance=~\"$instance\"}[5m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Skipped",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Rule Evaluator Iterations",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "iterations",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "durations",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 22,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(prometheus_notifications_sent_total[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Notifications Sent",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Notifications",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "notifications",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 23,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "(time() - prometheus_config_last_reload_success_timestamp_seconds{job=~\"$job\",instance=~\"$instance\"}) / 60",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Minutes Since Successful Config Reload",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Minutes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 24,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "prometheus_config_last_reload_successful{job=~\"$job\",instance=~\"$instance\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Successful Config Reload",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "short",
+ "label": "Success",
+ "logBase": 1,
+ "max": "1",
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "config",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "GC invocation durations",
+ "fill": 1,
+ "id": 28,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(go_gc_duration_seconds_sum{instance=~\"$instance\",job=~\"$job\"}[2m])) by (instance)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{instance}}",
+ "refId": "A",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "GC Rate / 2m",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "garbage collection",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": true,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "This is probably wrong! Please help.",
+ "fill": 1,
+ "id": 26,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "allocated",
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(go_memstats_alloc_bytes_total{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "alloc_bytes_total",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_alloc_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "allocated",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_buck_hash_sys_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "profiling bucket hash table",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_gc_sys_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "GC metadata",
+ "refId": "D",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_heap_alloc_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "heap in-use",
+ "refId": "E",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_heap_idle_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "heap idle",
+ "refId": "F",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "heap in use",
+ "refId": "G",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_heap_released_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "heap released",
+ "refId": "H",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_heap_sys_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "heap system",
+ "refId": "I",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_mcache_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "mcache in use",
+ "refId": "J",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_mcache_sys_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "mcache sys",
+ "refId": "K",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_mspan_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "mspan in use",
+ "refId": "L",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_mspan_sys_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "mspan sys",
+ "refId": "M",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_next_gc_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "heap next gc",
+ "refId": "N",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_other_sys_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "other sys",
+ "refId": "O",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_stack_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "stack in use",
+ "refId": "P",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_stack_sys_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "stack sys",
+ "refId": "Q",
+ "step": 10
+ },
+ {
+ "expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "sys",
+ "refId": "R",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Go Memory Usage (FIXME)",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 9,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 3,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "prometheus_target_interval_length_seconds{instance=~\"$instance\", job=~\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{quantile}} {{interval}}",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Scrape Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Seconds",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 7,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 3,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(prometheus_target_interval_length_seconds_count{job=~\"$job\",instance=~\"$instance\"}[5m])) by (interval)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{interval}}",
+ "refId": "A",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Target Scrapes / 5m",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Scrapes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Broken, ignore",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "datasource",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": true,
+ "name": "job",
+ "options": [],
+ "query": "query_result(prometheus_tsdb_head_samples_appended_total)",
+ "refresh": 2,
+ "regex": "/.*job=\"([^\"]+)/",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": true,
+ "name": "instance",
+ "options": [],
+ "query": "query_result(up{job=~\"$job\"})",
+ "refresh": 2,
+ "regex": "/.*instance=\"([^\"]+).*/",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": true,
+ "text": "1h",
+ "value": "1h"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "interval",
+ "options": [
+ {
+ "selected": true,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "3h",
+ "value": "3h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "2d",
+ "value": "2d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ },
+ {
+ "selected": false,
+ "text": "90d",
+ "value": "90d"
+ },
+ {
+ "selected": false,
+ "text": "180d",
+ "value": "180d"
+ }
+ ],
+ "query": "1h, 3h, 6h, 12h, 1d, 2d, 7d, 30d, 90d, 180d",
+ "type": "custom"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-4h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Prometheus",
+ "version": 1
+}
diff --git a/terraform-ci-infra/1n_nmd/grafana/main.tf b/terraform-ci-infra/1n_nmd/grafana/main.tf
new file mode 100644
index 0000000000..b67ba03985
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/main.tf
@@ -0,0 +1,24 @@
+locals {
+ datacenters = join(",", var.nomad_datacenters)
+}
+
+data "template_file" "nomad_job_grafana" {
+ template = file("${path.module}/conf/nomad/grafana.hcl")
+ vars = {
+ datacenters = local.datacenters
+ job_name = var.grafana_job_name
+ use_canary = var.grafana_use_canary
+ group_count = var.grafana_group_count
+ service_name = var.grafana_service_name
+ use_vault_provider = var.grafana_vault_secret.use_vault_provider
+ image = var.grafana_container_image
+ cpu = var.grafana_cpu
+ mem = var.grafana_mem
+ port = var.grafana_port
+ }
+}
+
+resource "nomad_job" "nomad_job_grafana" {
+ jobspec = data.template_file.nomad_job_grafana.rendered
+ detach = false
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/grafana/variables.tf b/terraform-ci-infra/1n_nmd/grafana/variables.tf
new file mode 100644
index 0000000000..0c2382b16a
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/variables.tf
@@ -0,0 +1,66 @@
+# Nomad
+variable "nomad_datacenters" {
+ description = "Nomad data centers"
+ type = list(string)
+ default = [ "dc1" ]
+}
+
+# Grafana
+variable "grafana_job_name" {
+ description = "Grafana job name"
+ type = string
+ default = "grafana"
+}
+
+variable "grafana_group_count" {
+ description = "Number of grafana group instances"
+ type = number
+ default = 1
+}
+
+variable "grafana_service_name" {
+ description = "Grafana service name"
+ type = string
+ default = "grafana"
+}
+
+variable "grafana_container_image" {
+ description = "Grafana docker image"
+ type = string
+ default = "grafana/grafana:7.3.7"
+}
+
+variable "grafana_use_canary" {
+ description = "Uses canary deployment"
+ type = bool
+ default = false
+}
+
+variable "grafana_vault_secret" {
+ description = "Set of properties to be able to fetch secret from vault"
+ type = object({
+ use_vault_provider = bool,
+ vault_kv_policy_name = string,
+ vault_kv_path = string,
+ vault_kv_field_access_key = string,
+ vault_kv_field_secret_key = string
+ })
+}
+
+variable "grafana_cpu" {
+ description = "Grafana CPU allocation"
+ type = number
+ default = 2000
+}
+
+variable "grafana_mem" {
+ description = "Grafana RAM allocation"
+ type = number
+ default = 8192
+}
+
+variable "grafana_port" {
+ description = "Grafana TCP allocation"
+ type = number
+ default = 3000
+} \ No newline at end of file