aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpmikus <pmikus@cisco.com>2021-01-07 14:27:38 +0000
committerPeter Mikus <pmikus@cisco.com>2021-01-29 08:46:14 +0000
commit2856c0a0c19319de146a93c27e106351faf1926c (patch)
treeb09813999df89e4120704d1701633f8547d7b8fa
parentdae934efdf9c773b90cc4a9595d217dd55035561 (diff)
Infra: Monitoring capability
+ Monitoring SOA + Nomad alertmanager job + Nomad prometheus job + Nomad grafana job Signed-off-by: pmikus <pmikus@cisco.com> Change-Id: I0b32e9c87276ba1a2d4a5322816f3473c737eae2 (cherry picked from commit a44eef233de959c5679602dc6cd1ed866d6abc14)
-rw-r--r--resources/tools/testbed-setup/ansible/roles/consul/tasks/main.yaml14
-rw-r--r--resources/tools/testbed-setup/ansible/roles/consul/templates/telemetry.hcl.j23
-rw-r--r--terraform-ci-infra/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl333
-rw-r--r--terraform-ci-infra/1n_nmd/alertmanager/main.tf37
-rw-r--r--terraform-ci-infra/1n_nmd/alertmanager/variables.tf84
-rw-r--r--terraform-ci-infra/1n_nmd/exporter/conf/nomad/exporter.hcl587
-rw-r--r--terraform-ci-infra/1n_nmd/exporter/main.tf64
-rw-r--r--terraform-ci-infra/1n_nmd/exporter/variables.tf76
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_http.json1030
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_icmp.json368
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/consul.json1438
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/docker_cadvisor.json2040
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/node_exporter.json13696
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/nomad.json869
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/nomad/grafana.hcl331
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/conf/prometheus.json3055
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/main.tf24
-rw-r--r--terraform-ci-infra/1n_nmd/grafana/variables.tf66
-rw-r--r--terraform-ci-infra/1n_nmd/main.tf188
-rw-r--r--terraform-ci-infra/1n_nmd/prometheus/conf/nomad/prometheus.hcl569
-rw-r--r--terraform-ci-infra/1n_nmd/prometheus/main.tf37
-rw-r--r--terraform-ci-infra/1n_nmd/prometheus/variables.tf84
-rw-r--r--terraform-ci-infra/1n_nmd/terraform.tfstate829
-rw-r--r--terraform-ci-infra/1n_nmd/terraform.tfstate.backup831
24 files changed, 26651 insertions, 2 deletions
diff --git a/resources/tools/testbed-setup/ansible/roles/consul/tasks/main.yaml b/resources/tools/testbed-setup/ansible/roles/consul/tasks/main.yaml
index 9d1ca19..f87590e 100644
--- a/resources/tools/testbed-setup/ansible/roles/consul/tasks/main.yaml
+++ b/resources/tools/testbed-setup/ansible/roles/consul/tasks/main.yaml
@@ -125,6 +125,16 @@
tags:
- consul-conf
+- name: Conf - Telemetry Configuration
+ template:
+ src: telemetry.hcl.j2
+ dest: "{{ consul_config_dir }}/telemetry.hcl"
+ owner: "{{ consul_user }}"
+ group: "{{ consul_group }}"
+ mode: 0644
+ tags:
+ - consul-conf
+
- name: Conf - Services Configuration
template:
src: services.json.j2
@@ -156,8 +166,8 @@
owner: "root"
group: "root"
mode: 0644
- notify:
- - "Restart Consul"
+# notify:
+# - "Restart Consul"
# - "Stop Systemd-resolved"
# - "Restart Nomad"
tags:
diff --git a/resources/tools/testbed-setup/ansible/roles/consul/templates/telemetry.hcl.j2 b/resources/tools/testbed-setup/ansible/roles/consul/templates/telemetry.hcl.j2
new file mode 100644
index 0000000..ec7fabc
--- /dev/null
+++ b/resources/tools/testbed-setup/ansible/roles/consul/templates/telemetry.hcl.j2
@@ -0,0 +1,3 @@
+telemetry {
+ prometheus_retention_time = "24h"
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl b/terraform-ci-infra/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl
new file mode 100644
index 0000000..4560cf0
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl
@@ -0,0 +1,333 @@
+job "${job_name}" {
+ # The "region" parameter specifies the region in which to execute the job.
+ # If omitted, this inherits the default region name of "global".
+ # region = "global"
+ #
+ # The "datacenters" parameter specifies the list of datacenters which should
+ # be considered when placing this task. This must be provided.
+ datacenters = "${datacenters}"
+
+ # The "type" parameter controls the type of job, which impacts the scheduler's
+ # decision on placement. This configuration is optional and defaults to
+ # "service". For a full list of job types and their differences, please see
+ # the online documentation.
+ #
+ # For more information, please see the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/jobspec/schedulers
+ #
+ type = "service"
+
+ update {
+ # The "max_parallel" parameter specifies the maximum number of updates to
+ # perform in parallel. In this case, this specifies to update a single task
+ # at a time.
+ max_parallel = 1
+
+ health_check = "checks"
+
+ # The "min_healthy_time" parameter specifies the minimum time the allocation
+ # must be in the healthy state before it is marked as healthy and unblocks
+ # further allocations from being updated.
+ min_healthy_time = "10s"
+
+ # The "healthy_deadline" parameter specifies the deadline in which the
+ # allocation must be marked as healthy after which the allocation is
+ # automatically transitioned to unhealthy. Transitioning to unhealthy will
+ # fail the deployment and potentially roll back the job if "auto_revert" is
+ # set to true.
+ healthy_deadline = "3m"
+
+ # The "progress_deadline" parameter specifies the deadline in which an
+ # allocation must be marked as healthy. The deadline begins when the first
+ # allocation for the deployment is created and is reset whenever an allocation
+ # as part of the deployment transitions to a healthy state. If no allocation
+ # transitions to the healthy state before the progress deadline, the
+ # deployment is marked as failed.
+ progress_deadline = "10m"
+
+%{ if use_canary }
+ # The "canary" parameter specifies that changes to the job that would result
+ # in destructive updates should create the specified number of canaries
+ # without stopping any previous allocations. Once the operator determines the
+ # canaries are healthy, they can be promoted which unblocks a rolling update
+ # of the remaining allocations at a rate of "max_parallel".
+ #
+ # Further, setting "canary" equal to the count of the task group allows
+ # blue/green deployments. When the job is updated, a full set of the new
+ # version is deployed and upon promotion the old version is stopped.
+ canary = 1
+
+ # Specifies if the job should auto-promote to the canary version when all
+ # canaries become healthy during a deployment. Defaults to false which means
+ # canaries must be manually updated with the nomad deployment promote
+ # command.
+ auto_promote = true
+
+ # The "auto_revert" parameter specifies if the job should auto-revert to the
+ # last stable job on deployment failure. A job is marked as stable if all the
+ # allocations as part of its deployment were marked healthy.
+ auto_revert = true
+%{ endif }
+ }
+
+ # The "group" stanza defines a series of tasks that should be co-located on
+ # the same Nomad client. Any task within a group will be placed on the same
+ # client.
+ #
+ # For more information and examples on the "group" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/group
+ #
+ group "prod-group1-${service_name}" {
+ # The "count" parameter specifies the number of the task groups that should
+ # be running under this group. This value must be non-negative and defaults
+ # to 1.
+ count = ${group_count}
+
+ # The constraint allows restricting the set of eligible nodes. Constraints
+ # may filter on attributes or client metadata.
+ #
+ # For more information and examples on the "volume" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/constraint
+ #
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+
+ # The "task" stanza creates an individual unit of work, such as a Docker
+ # container, web application, or batch processing.
+ #
+ # For more information and examples on the "task" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/task
+ #
+ task "prod-task1-${service_name}" {
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "exec"
+
+ %{ if use_vault_provider }
+ vault {
+ policies = "${vault_kv_policy_name}"
+ }
+ %{ endif }
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ command = "local/alertmanager-${version}.linux-amd64/alertmanager"
+ args = [
+ "--config.file=secrets/alertmanager.yml"
+ ]
+ }
+
+ # The artifact stanza instructs Nomad to fetch and unpack a remote resource,
+ # such as a file, tarball, or binary. Nomad downloads artifacts using the
+ # popular go-getter library, which permits downloading artifacts from a
+ # variety of locations using a URL as the input source.
+ #
+ # For more information and examples on the "artifact" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/artifact
+ #
+ artifact {
+ source = "${url}"
+ }
+
+ # The "template" stanza instructs Nomad to manage a template, such as
+ # a configuration file or script. This template can optionally pull data
+ # from Consul or Vault to populate runtime configuration data.
+ #
+ # For more information and examples on the "template" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/template
+ #
+ template {
+ change_mode = "noop"
+ change_signal = "SIGINT"
+ destination = "secrets/alertmanager.yml"
+ left_delimiter = "{{{"
+ right_delimiter = "}}}"
+ data = <<EOH
+global:
+ # The API URL to use for Slack notifications.
+ slack_api_url: '${slack_api_url}'
+
+# The directory from which notification templates are read.
+templates:
+- '/etc/alertmanager/template/*.tmpl'
+
+#tls_config:
+# # CA certificate to validate the server certificate with.
+# ca_file: <filepath> ]
+#
+# # Certificate and key files for client cert authentication to the server.
+# cert_file: <filepath>
+# key_file: <filepath>
+#
+# # ServerName extension to indicate the name of the server.
+# # http://tools.ietf.org/html/rfc4366#section-3.1
+# server_name: <string>
+#
+# # Disable validation of the server certificate.
+# insecure_skip_verify: true
+
+# The root route on which each incoming alert enters.
+route:
+ receiver: '${default_receiver}'
+
+ # The labels by which incoming alerts are grouped together. For example,
+ # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
+ # be batched into a single group.
+ #
+ # To aggregate by all possible labels use '...' as the sole label name.
+ # This effectively disables aggregation entirely, passing through all
+ # alerts as-is. This is unlikely to be what you want, unless you have
+ # a very low alert volume or your upstream notification system performs
+ # its own grouping. Example: group_by: [...]
+ group_by: ['alertname', 'cluster', 'service']
+
+ # When a new group of alerts is created by an incoming alert, wait at
+ # least 'group_wait' to send the initial notification.
+ # This way ensures that you get multiple alerts for the same group that start
+ # firing shortly after another are batched together on the first
+ # notification.
+ group_wait: 30s
+
+ # When the first notification was sent, wait 'group_interval' to send a batch
+ # of new alerts that started firing for that group.
+ group_interval: 5m
+
+ # If an alert has successfully been sent, wait 'repeat_interval' to
+ # resend them.
+ repeat_interval: 3h
+
+ # All the above attributes are inherited by all child routes and can
+ # overwritten on each.
+ # The child route trees.
+ routes:
+ # This routes performs a regular expression match on alert labels to
+ # catch alerts that are related to a list of services.
+ - match_re:
+ service: .*
+ receiver: ${default_receiver}
+ # The service has a sub-route for critical alerts, any alerts
+ # that do not match, i.e. severity != critical, fall-back to the
+ # parent node and are sent to 'team-X-mails'
+ routes:
+ - match:
+ severity: critical
+ receiver: '${default_receiver}'
+
+# Inhibition rules allow to mute a set of alerts given that another alert is
+# firing.
+# We use this to mute any warning-level notifications if the same alert is
+# already critical.
+inhibit_rules:
+- source_match:
+ severity: 'critical'
+ target_match:
+ severity: 'warning'
+ # Apply inhibition if the alertname is the same.
+ # CAUTION:
+ # If all label names listed in `equal` are missing
+ # from both the source and target alerts,
+ # the inhibition rule will apply!
+ equal: ['alertname', 'cluster', 'service']
+
+receivers:
+- name: '${default_receiver}'
+ slack_configs:
+ - channel: '#${slack_channel}'
+ send_resolved: true
+ icon_url: https://avatars3.githubusercontent.com/u/3380462
+ title: |-
+ [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }}
+ {{- if gt (len .CommonLabels) (len .GroupLabels) -}}
+ {{" "}}(
+ {{- with .CommonLabels.Remove .GroupLabels.Names }}
+ {{- range $index, $label := .SortedPairs -}}
+ {{ if $index }}, {{ end }}
+ {{- $label.Name }}="{{ $label.Value -}}"
+ {{- end }}
+ {{- end -}}
+ )
+ {{- end }}
+ text: >-
+ {{ range .Alerts -}}
+ *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }}
+
+ *Description:* {{ .Annotations.description }}
+
+ *Details:*
+ {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}`
+ {{ end }}
+ {{ end }}
+EOH
+ }
+
+ # The service stanza instructs Nomad to register a service with Consul.
+ #
+ # For more information and examples on the "task" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/service
+ #
+ service {
+ name = "${service_name}"
+ port = "${service_name}"
+ tags = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ]
+ check {
+ name = "Alertmanager Check Live"
+ type = "http"
+ path = "/-/healthy"
+ interval = "10s"
+ timeout = "2s"
+ }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # For more information and examples on the "resources" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = ${cpu}
+ memory = ${mem}
+ # The network stanza specifies the networking requirements for the task
+ # group, including the network mode and port allocations. When scheduling
+ # jobs in Nomad they are provisioned across your fleet of machines along
+ # with other jobs and services. Because you don't know in advance what host
+ # your job will be provisioned on, Nomad will provide your tasks with
+ # network configuration when they start up.
+ #
+ # For more information and examples on the "template" stanza, please see
+ # the online documentation at:
+ #
+ # https://www.nomadproject.io/docs/job-specification/network
+ #
+ network {
+ port "${service_name}" {
+ static = ${port}
+ }
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/alertmanager/main.tf b/terraform-ci-infra/1n_nmd/alertmanager/main.tf
new file mode 100644
index 0000000..411c78a
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/alertmanager/main.tf
@@ -0,0 +1,37 @@
+locals {
+ datacenters = join(",", var.nomad_datacenters)
+
+ alertmanager_url = join("",
+ [
+ "https://github.com",
+ "/prometheus/alertmanager/releases/download/",
+ "v${var.alertmanager_version}/",
+ "alertmanager-${var.alertmanager_version}.linux-amd64.tar.gz"
+ ]
+ )
+}
+
+data "template_file" "nomad_job_alertmanager" {
+ template = file("${path.module}/conf/nomad/alertmanager.hcl")
+ vars = {
+ datacenters = local.datacenters
+ url = local.alertmanager_url
+ job_name = var.alertmanager_job_name
+ use_canary = var.alertmanager_use_canary
+ group_count = var.alertmanager_group_count
+ service_name = var.alertmanager_service_name
+ use_vault_provider = var.alertmanager_vault_secret.use_vault_provider
+ version = var.alertmanager_version
+ cpu = var.alertmanager_cpu
+ mem = var.alertmanager_mem
+ port = var.alertmanager_port
+ slack_api_url = var.alertmanager_slack_api_url
+ slack_channel = var.alertmanager_slack_channel
+ default_receiver = var.alertmanager_default_receiver
+ }
+}
+
+resource "nomad_job" "nomad_job_alertmanager" {
+ jobspec = data.template_file.nomad_job_alertmanager.rendered
+ detach = false
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/alertmanager/variables.tf b/terraform-ci-infra/1n_nmd/alertmanager/variables.tf
new file mode 100644
index 0000000..ebd8621
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/alertmanager/variables.tf
@@ -0,0 +1,84 @@
+# Nomad
+variable "nomad_datacenters" {
+ description = "Nomad data centers"
+ type = list(string)
+ default = [ "dc1" ]
+}
+
+# Alermanager
+variable "alertmanager_job_name" {
+ description = "Job name"
+ type = string
+ default = "alertmanager"
+}
+
+variable "alertmanager_group_count" {
+ description = "Number of group instances"
+ type = number
+ default = 1
+}
+
+variable "alertmanager_service_name" {
+ description = "Service name"
+ type = string
+ default = "alertmanager"
+}
+
+variable "alertmanager_version" {
+ description = "Version"
+ type = string
+ default = "0.21.0"
+}
+
+variable "alertmanager_use_canary" {
+ description = "Uses canary deployment"
+ type = bool
+ default = false
+}
+
+variable "alertmanager_vault_secret" {
+ description = "Set of properties to be able to fetch secret from vault"
+ type = object({
+ use_vault_provider = bool,
+ vault_kv_policy_name = string,
+ vault_kv_path = string,
+ vault_kv_field_access_key = string,
+ vault_kv_field_secret_key = string
+ })
+}
+
+variable "alertmanager_cpu" {
+ description = "CPU allocation"
+ type = number
+ default = 1000
+}
+
+variable "alertmanager_mem" {
+ description = "RAM allocation"
+ type = number
+ default = 1024
+}
+
+variable "alertmanager_port" {
+ description = "TCP allocation"
+ type = number
+ default = 9093
+}
+
+variable "alertmanager_default_receiver" {
+ description = "Alertmanager default receiver"
+ type = string
+ default = "default-receiver"
+}
+
+variable "alertmanager_slack_api_url" {
+ description = "Alertmanager slack API URL"
+ type = string
+ default = "https://hooks.slack.com/services/XXXXXXXXX/XXXXXXXXXXX/XXXXXXXXXXXXXXXXXXXXXXXX"
+}
+
+variable "alertmanager_slack_channel" {
+ description = "Alertmanager slack channel"
+ type = string
+ default = "slack-channel"
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/exporter/conf/nomad/exporter.hcl b/terraform-ci-infra/1n_nmd/exporter/conf/nomad/exporter.hcl
new file mode 100644
index 0000000..4fd0768
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/exporter/conf/nomad/exporter.hcl
@@ -0,0 +1,587 @@
+job "${job_name}" {
+ # The "region" parameter specifies the region in which to execute the job.
+ # If omitted, this inherits the default region name of "global".
+ # region = "global"
+ #
+ # The "datacenters" parameter specifies the list of datacenters which should
+ # be considered when placing this task. This must be provided.
+ datacenters = "${datacenters}"
+
+ # The "type" parameter controls the type of job, which impacts the scheduler's
+ # decision on placement. This configuration is optional and defaults to
+ # "service". For a full list of job types and their differences, please see
+ # the online documentation.
+ #
+ # https://www.nomadproject.io/docs/jobspec/schedulers
+ #
+ type = "system"
+
+ update {
+ # The "max_parallel" parameter specifies the maximum number of updates to
+ # perform in parallel. In this case, this specifies to update a single task
+ # at a time.
+ max_parallel = 1
+
+ health_check = "checks"
+
+ # The "min_healthy_time" parameter specifies the minimum time the allocation
+ # must be in the healthy state before it is marked as healthy and unblocks
+ # further allocations from being updated.
+ min_healthy_time = "10s"
+
+ # The "healthy_deadline" parameter specifies the deadline in which the
+ # allocation must be marked as healthy after which the allocation is
+ # automatically transitioned to unhealthy. Transitioning to unhealthy will
+ # fail the deployment and potentially roll back the job if "auto_revert" is
+ # set to true.
+ healthy_deadline = "3m"
+
+ # The "progress_deadline" parameter specifies the deadline in which an
+ # allocation must be marked as healthy. The deadline begins when the first
+ # allocation for the deployment is created and is reset whenever an allocation
+ # as part of the deployment transitions to a healthy state. If no allocation
+ # transitions to the healthy state before the progress deadline, the
+ # deployment is marked as failed.
+ progress_deadline = "10m"
+
+%{ if use_canary }
+ # The "canary" parameter specifies that changes to the job that would result
+ # in destructive updates should create the specified number of canaries
+ # without stopping any previous allocations. Once the operator determines the
+ # canaries are healthy, they can be promoted which unblocks a rolling update
+ # of the remaining allocations at a rate of "max_parallel".
+ #
+ # Further, setting "canary" equal to the count of the task group allows
+ # blue/green deployments. When the job is updated, a full set of the new
+ # version is deployed and upon promotion the old version is stopped.
+ canary = 1
+
+ # Specifies if the job should auto-promote to the canary version when all
+ # canaries become healthy during a deployment. Defaults to false which means
+ # canaries must be manually updated with the nomad deployment promote
+ # command.
+ auto_promote = true
+
+ # The "auto_revert" parameter specifies if the job should auto-revert to the
+ # last stable job on deployment failure. A job is marked as stable if all the
+ # allocations as part of its deployment were marked healthy.
+ auto_revert = true
+%{ endif }
+ }
+
+ # The "group" stanza defines a series of tasks that should be co-located on
+ # the same Nomad client. Any task within a group will be placed on the same
+ # client.
+ #
+ # https://www.nomadproject.io/docs/job-specification/group
+ #
+ group "prod-group1-exporter-amd64" {
+ # The constraint allows restricting the set of eligible nodes. Constraints
+ # may filter on attributes or client metadata.
+ #
+ # https://www.nomadproject.io/docs/job-specification/constraint
+ #
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "!="
+ value = "arm64"
+ }
+
+ # The "task" stanza creates an individual unit of work, such as a Docker
+ # container, web application, or batch processing.
+ #
+ # https://www.nomadproject.io/docs/job-specification/task
+ #
+ task "prod-task1-${node_service_name}-amd64" {
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "raw_exec"
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ command = "local/node_exporter-${node_version}.linux-amd64/node_exporter"
+ }
+
+ # The artifact stanza instructs Nomad to fetch and unpack a remote resource,
+ # such as a file, tarball, or binary. Nomad downloads artifacts using the
+ # popular go-getter library, which permits downloading artifacts from a
+ # variety of locations using a URL as the input source.
+ #
+ # https://www.nomadproject.io/docs/job-specification/artifact
+ #
+ artifact {
+ source = "${node_url_amd64}"
+ }
+
+ # The service stanza instructs Nomad to register a service with Consul.
+ #
+ # https://www.nomadproject.io/docs/job-specification/service
+ #
+ service {
+ name = "${node_service_name}"
+ port = "${node_service_name}"
+ check {
+ name = "Node Exporter Check Live"
+ type = "http"
+ path = "/metrics"
+ interval = "10s"
+ timeout = "2s"
+ }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = 500
+ # The network stanza specifies the networking requirements for the task
+ # group, including the network mode and port allocations. When scheduling
+ # jobs in Nomad they are provisioned across your fleet of machines along
+ # with other jobs and services. Because you don't know in advance what host
+ # your job will be provisioned on, Nomad will provide your tasks with
+ # network configuration when they start up.
+ #
+ # https://www.nomadproject.io/docs/job-specification/network
+ #
+ network {
+ port "${node_service_name}" {
+ static = ${node_port}
+ }
+ }
+ }
+ }
+ task "prod-task2-${blackbox_service_name}-amd64" {
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "exec"
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ command = "local/blackbox_exporter-${blackbox_version}.linux-amd64/blackbox_exporter"
+ args = [
+ "--config.file=secrets/blackbox.yml"
+ ]
+ }
+
+ # The "template" stanza instructs Nomad to manage a template, such as
+ # a configuration file or script. This template can optionally pull data
+ # from Consul or Vault to populate runtime configuration data.
+ #
+ # https://www.nomadproject.io/docs/job-specification/template
+ #
+ template {
+ change_mode = "noop"
+ change_signal = "SIGINT"
+ destination = "secrets/blackbox.yml"
+ data = <<EOH
+modules:
+ http_2xx:
+ prober: http
+ timeout: 5s
+ http:
+ valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
+ no_follow_redirects: false
+ fail_if_ssl: false
+ fail_if_not_ssl: true
+ tls_config:
+ insecure_skip_verify: false
+ preferred_ip_protocol: "ip4"
+ icmp_v4:
+ prober: icmp
+ timeout: 5s
+ icmp:
+ preferred_ip_protocol: "ip4"
+ dns_udp:
+ prober: dns
+ timeout: 5s
+ dns:
+ query_name: "jenkins.fd.io"
+ query_type: "A"
+ valid_rcodes:
+ - NOERROR
+EOH
+ }
+
+ # The artifact stanza instructs Nomad to fetch and unpack a remote resource,
+ # such as a file, tarball, or binary. Nomad downloads artifacts using the
+ # popular go-getter library, which permits downloading artifacts from a
+ # variety of locations using a URL as the input source.
+ #
+ # https://www.nomadproject.io/docs/job-specification/artifact
+ #
+ artifact {
+ source = "${blackbox_url_amd64}"
+ }
+
+ # The service stanza instructs Nomad to register a service with Consul.
+ #
+ # https://www.nomadproject.io/docs/job-specification/service
+ #
+ service {
+ name = "${blackbox_service_name}"
+ port = "${blackbox_service_name}"
+ tags = [ "${blackbox_service_name}$${NOMAD_ALLOC_INDEX}" ]
+ check {
+ name = "Blackbox Exporter Check Live"
+ type = "http"
+ path = "/metrics"
+ interval = "10s"
+ timeout = "2s"
+ }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = 500
+ # The network stanza specifies the networking requirements for the task
+ # group, including the network mode and port allocations. When scheduling
+ # jobs in Nomad they are provisioned across your fleet of machines along
+ # with other jobs and services. Because you don't know in advance what host
+ # your job will be provisioned on, Nomad will provide your tasks with
+ # network configuration when they start up.
+ #
+ # https://www.nomadproject.io/docs/job-specification/network
+ #
+ network {
+ port "${blackbox_service_name}" {
+ static = ${blackbox_port}
+ }
+ }
+ }
+ }
+
+ task "prod-task3-${cadvisor_service_name}-amd64" {
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "docker"
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ image = "${cadvisor_image}"
+ volumes = [
+ "/:/rootfs:ro",
+ "/var/run:/var/run:rw",
+ "/sys:/sys:ro",
+ "/var/lib/docker/:/var/lib/docker:ro",
+ "/cgroup:/cgroup:ro"
+ ]
+ }
+
+ # The service stanza instructs Nomad to register a service with Consul.
+ #
+ # https://www.nomadproject.io/docs/job-specification/service
+ #
+ service {
+ name = "${cadvisor_service_name}"
+ port = "${cadvisor_service_name}"
+ check {
+ name = "cAdvisor Check Live"
+ type = "http"
+ path = "/metrics"
+ interval = "10s"
+ timeout = "2s"
+ }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = 500
+ # The network stanza specifies the networking requirements for the task
+ # group, including the network mode and port allocations. When scheduling
+ # jobs in Nomad they are provisioned across your fleet of machines along
+ # with other jobs and services. Because you don't know in advance what host
+ # your job will be provisioned on, Nomad will provide your tasks with
+ # network configuration when they start up.
+ #
+ # https://www.nomadproject.io/docs/job-specification/network
+ #
+ network {
+ port "${cadvisor_service_name}" {
+ static = ${cadvisor_port}
+ }
+ }
+ }
+ }
+ }
+
+ group "prod-group1-exporter-arm64" {
+ # The constraint allows restricting the set of eligible nodes. Constraints
+ # may filter on attributes or client metadata.
+ #
+ # https://www.nomadproject.io/docs/job-specification/constraint
+ #
+ constraint {
+ attribute = "$${attr.cpu.arch}"
+ operator = "=="
+ value = "arm64"
+ }
+
+ # The "task" stanza creates an individual unit of work, such as a Docker
+ # container, web application, or batch processing.
+ #
+ # https://www.nomadproject.io/docs/job-specification/task
+ #
+ task "prod-task1-${node_service_name}-arm64" {
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "raw_exec"
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ command = "local/node_exporter-${node_version}.linux-arm64/node_exporter"
+ }
+
+ # The artifact stanza instructs Nomad to fetch and unpack a remote resource,
+ # such as a file, tarball, or binary. Nomad downloads artifacts using the
+ # popular go-getter library, which permits downloading artifacts from a
+ # variety of locations using a URL as the input source.
+ #
+ # https://www.nomadproject.io/docs/job-specification/artifact
+ #
+ artifact {
+ source = "${node_url_arm64}"
+ }
+
+ # The service stanza instructs Nomad to register a service with Consul.
+ #
+ # https://www.nomadproject.io/docs/job-specification/service
+ #
+ service {
+ name = "${node_service_name}"
+ port = "${node_service_name}"
+ check {
+ name = "Node Exporter Check Live"
+ type = "http"
+ path = "/metrics"
+ interval = "10s"
+ timeout = "2s"
+ }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = 500
+ # The network stanza specifies the networking requirements for the task
+ # group, including the network mode and port allocations. When scheduling
+ # jobs in Nomad they are provisioned across your fleet of machines along
+ # with other jobs and services. Because you don't know in advance what host
+ # your job will be provisioned on, Nomad will provide your tasks with
+ # network configuration when they start up.
+ #
+ # https://www.nomadproject.io/docs/job-specification/network
+ #
+ network {
+ port "${node_service_name}" {
+ static = ${node_port}
+ }
+ }
+ }
+ }
+
+ task "prod-task2-${blackbox_service_name}-arm64" {
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "exec"
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ command = "local/blackbox_exporter-${blackbox_version}.linux-arm64/blackbox_exporter"
+ args = [
+ "--config.file=secrets/blackbox.yml"
+ ]
+ }
+
+ # The "template" stanza instructs Nomad to manage a template, such as
+ # a configuration file or script. This template can optionally pull data
+ # from Consul or Vault to populate runtime configuration data.
+ #
+ # https://www.nomadproject.io/docs/job-specification/template
+ #
+ template {
+ change_mode = "noop"
+ change_signal = "SIGINT"
+ destination = "secrets/blackbox.yml"
+ data = <<EOH
+modules:
+ http_2xx:
+ prober: http
+ timeout: 5s
+ http:
+ valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
+ no_follow_redirects: false
+ fail_if_ssl: false
+ fail_if_not_ssl: true
+ tls_config:
+ insecure_skip_verify: false
+ preferred_ip_protocol: "ip4"
+ icmp_v4:
+ prober: icmp
+ timeout: 5s
+ icmp:
+ preferred_ip_protocol: "ip4"
+ dns_udp:
+ prober: dns
+ timeout: 5s
+ dns:
+ query_name: "jenkins.fd.io"
+ query_type: "A"
+ valid_rcodes:
+ - NOERROR
+EOH
+ }
+
+ # The artifact stanza instructs Nomad to fetch and unpack a remote resource,
+ # such as a file, tarball, or binary. Nomad downloads artifacts using the
+ # popular go-getter library, which permits downloading artifacts from a
+ # variety of locations using a URL as the input source.
+ #
+ # https://www.nomadproject.io/docs/job-specification/artifact
+ #
+ artifact {
+ source = "${blackbox_url_arm64}"
+ }
+
+ # The service stanza instructs Nomad to register a service with Consul.
+ #
+ # https://www.nomadproject.io/docs/job-specification/service
+ #
+ service {
+ name = "${blackbox_service_name}"
+ port = "${blackbox_service_name}"
+ tags = [ "${blackbox_service_name}$${NOMAD_ALLOC_INDEX}" ]
+ check {
+ name = "Blackbox Exporter Check Live"
+ type = "http"
+ path = "/metrics"
+ interval = "10s"
+ timeout = "2s"
+ }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = 500
+ # The network stanza specifies the networking requirements for the task
+ # group, including the network mode and port allocations. When scheduling
+ # jobs in Nomad they are provisioned across your fleet of machines along
+ # with other jobs and services. Because you don't know in advance what host
+ # your job will be provisioned on, Nomad will provide your tasks with
+ # network configuration when they start up.
+ #
+ # https://www.nomadproject.io/docs/job-specification/network
+ #
+ network {
+ port "${blackbox_service_name}" {
+ static = ${blackbox_port}
+ }
+ }
+ }
+ }
+
+ task "prod-task3-${cadvisor_service_name}-arm64" {
+ # The "driver" parameter specifies the task driver that should be used to
+ # run the task.
+ driver = "docker"
+
+ # The "config" stanza specifies the driver configuration, which is passed
+ # directly to the driver to start the task. The details of configurations
+ # are specific to each driver, so please see specific driver
+ # documentation for more information.
+ config {
+ # There is currently no official release for arm yet...using community.
+ image = "zcube/cadvisor:latest"
+ volumes = [
+ "/:/rootfs:ro",
+ "/var/run:/var/run:rw",
+ "/sys:/sys:ro",
+ "/var/lib/docker/:/var/lib/docker:ro",
+ "/cgroup:/cgroup:ro"
+ ]
+ }
+
+ # The service stanza instructs Nomad to register a service with Consul.
+ #
+ # https://www.nomadproject.io/docs/job-specification/service
+ #
+ service {
+ name = "${cadvisor_service_name}"
+ port = "${cadvisor_service_name}"
+ check {
+ name = "cAdvisor Check Live"
+ type = "http"
+ path = "/metrics"
+ interval = "10s"
+ timeout = "2s"
+ }
+ }
+
+ # The "resources" stanza describes the requirements a task needs to
+ # execute. Resource requirements include memory, network, cpu, and more.
+ # This ensures the task will execute on a machine that contains enough
+ # resource capacity.
+ #
+ # https://www.nomadproject.io/docs/job-specification/resources
+ #
+ resources {
+ cpu = 500
+ # The network stanza specifies the networking requirements for the task
+ # group, including the network mode and port allocations. When scheduling
+ # jobs in Nomad they are provisioned across your fleet of machines along
+ # with other jobs and services. Because you don't know in advance what host
+ # your job will be provisioned on, Nomad will provide your tasks with
+ # network configuration when they start up.
+ #
+ # https://www.nomadproject.io/docs/job-specification/network
+ #
+ network {
+ port "${cadvisor_service_name}" {
+ static = ${cadvisor_port}
+ }
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/exporter/main.tf b/terraform-ci-infra/1n_nmd/exporter/main.tf
new file mode 100644
index 0000000..35eb95b
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/exporter/main.tf
@@ -0,0 +1,64 @@
+locals {
+ datacenters = join(",", var.nomad_datacenters)
+
+ node_url_amd64 = join("",
+ [
+ "https://github.com",
+ "/prometheus/node_exporter/releases/download/",
+ "v${var.node_version}/",
+ "node_exporter-${var.node_version}.linux-amd64.tar.gz"
+ ]
+ )
+ node_url_arm64 = join("",
+ [
+ "https://github.com",
+ "/prometheus/node_exporter/releases/download/",
+ "v${var.node_version}/",
+ "node_exporter-${var.node_version}.linux-arm64.tar.gz"
+ ]
+ )
+
+ blackbox_url_amd64 = join("",
+ [
+ "https://github.com",
+ "/prometheus/blackbox_exporter/releases/download/",
+ "v${var.blackbox_version}/",
+ "blackbox_exporter-${var.blackbox_version}.linux-amd64.tar.gz"
+ ]
+ )
+ blackbox_url_arm64 = join("",
+ [
+ "https://github.com",
+ "/prometheus/blackbox_exporter/releases/download/",
+ "v${var.blackbox_version}/",
+ "blackbox_exporter-${var.blackbox_version}.linux-arm64.tar.gz"
+ ]
+ )
+}
+
+data "template_file" "nomad_job_exporter" {
+ template = file("${path.module}/conf/nomad/exporter.hcl")
+ vars = {
+ datacenters = local.datacenters
+ job_name = var.exporter_job_name
+ use_canary = var.exporter_use_canary
+ node_url_amd64 = local.node_url_amd64
+ node_url_arm64 = local.node_url_arm64
+ node_version = var.node_version
+ node_service_name = var.node_service_name
+ node_port = var.node_port
+ blackbox_url_amd64 = local.blackbox_url_amd64
+ blackbox_url_arm64 = local.blackbox_url_arm64
+ blackbox_version = var.blackbox_version
+ blackbox_service_name = var.blackbox_service_name
+ blackbox_port = var.blackbox_port
+ cadvisor_image = var.cadvisor_image
+ cadvisor_service_name = var.cadvisor_service_name
+ cadvisor_port = var.cadvisor_port
+ }
+}
+
+resource "nomad_job" "nomad_job_exporter" {
+ jobspec = data.template_file.nomad_job_exporter.rendered
+ detach = false
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/exporter/variables.tf b/terraform-ci-infra/1n_nmd/exporter/variables.tf
new file mode 100644
index 0000000..bfa8bd3
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/exporter/variables.tf
@@ -0,0 +1,76 @@
+# Nomad
+variable "nomad_datacenters" {
+ description = "Nomad data centers"
+ type = list(string)
+ default = [ "dc1" ]
+}
+
+# Exporter
+variable "exporter_job_name" {
+ description = "Exporter job name"
+ type = string
+ default = "exporter"
+}
+
+variable "exporter_use_canary" {
+ description = "Uses canary deployment"
+ type = bool
+ default = false
+}
+
+# Node Exporter
+variable "node_service_name" {
+ description = "Node exporter service name"
+ type = string
+ default = "nodeexporter"
+}
+
+variable "node_version" {
+ description = "Node exporter version"
+ type = string
+ default = "1.0.1"
+}
+
+variable "node_port" {
+ description = "Node exporter TCP allocation"
+ type = number
+ default = 9100
+}
+
+# Blackbox Exporter
+variable "blackbox_service_name" {
+ description = "Blackbox exporter service name"
+ type = string
+ default = "blackboxexporter"
+}
+
+variable "blackbox_version" {
+ description = "Blackbox exporter version"
+ type = string
+ default = "0.18.0"
+}
+
+variable "blackbox_port" {
+ description = "Blackbox exporter TCP allocation"
+ type = number
+ default = 9115
+}
+
+# cAdvisor Exporter
+variable "cadvisor_service_name" {
+ description = "cAdvisor exporter service name"
+ type = string
+ default = "cadvisorexporter"
+}
+
+variable "cadvisor_image" {
+ description = "cAdvisor exporter docker image"
+ type = string
+ default = "gcr.io/cadvisor/cadvisor:v0.38.7"
+}
+
+variable "cadvisor_port" {
+ description = "cAdvisor exporter TCP allocation"
+ type = number
+ default = 8080
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_http.json b/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_http.json
new file mode 100644
index 0000000..f9df1b2
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_http.json
@@ -0,0 +1,1030 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "signcl-prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "5.2.2"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": "5.0.0"
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "5.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "Prometheus Blackbox Exporter Overview",
+ "editable": true,
+ "gnetId": 7587,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1534695504413,
+ "links": [],
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 138,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "probe_duration_seconds{instance=~\"$target\"}",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "{{ instance }}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Global Probe Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 1,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 8
+ },
+ "id": 15,
+ "panels": [],
+ "repeat": "target",
+ "title": "$target status",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 10,
+ "x": 4,
+ "y": 9
+ },
+ "id": 25,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "probe_http_duration_seconds{instance=~\"$target\"}",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "{{ phase }}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "HTTP Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 10,
+ "x": 14,
+ "y": 9
+ },
+ "id": 17,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "probe_duration_seconds{instance=~\"$target\"}",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "legendFormat": "seconds",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Probe Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 4,
+ "x": 0,
+ "y": 11
+ },
+ "id": 20,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 3,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "repeat": null,
+ "repeatDirection": "h",
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "probe_http_status_code{instance=~\"$target\"}",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "201, 399",
+ "title": "HTTP Status Code",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ },
+ {
+ "op": "=",
+ "text": "YES",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "0"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 4,
+ "x": 0,
+ "y": 13
+ },
+ "id": 27,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "probe_http_version{instance=~\"$target\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "HTTP Version",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 4,
+ "x": 0,
+ "y": 15
+ },
+ "id": 18,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 3,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "repeat": null,
+ "repeatDirection": "v",
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "probe_http_ssl{instance=~\"$target\"}",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "0, 1",
+ "title": "SSL",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ },
+ {
+ "op": "=",
+ "text": "YES",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "NO",
+ "value": "0"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "format": "dtdurations",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 10,
+ "x": 4,
+ "y": 15
+ },
+ "id": 19,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 3,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "repeat": null,
+ "repeatDirection": "h",
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "probe_ssl_earliest_cert_expiry{instance=~\"$target\"} - time()",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "0,1209600",
+ "timeFrom": null,
+ "title": "SSL Expiry",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ },
+ {
+ "op": "=",
+ "text": "YES",
+ "value": "1"
+ },
+ {
+ "op": "=",
+ "text": "NO",
+ "value": "0"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "s",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 5,
+ "x": 14,
+ "y": 15
+ },
+ "id": 23,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "repeat": null,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg(probe_duration_seconds{instance=~\"$target\"})",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Average Probe Duration",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "s",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 5,
+ "x": 19,
+ "y": 15
+ },
+ "id": 24,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "repeat": null,
+ "repeatDirection": "h",
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg(probe_dns_lookup_time_seconds{instance=~\"$target\"})",
+ "format": "time_series",
+ "interval": "$interval",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Average DNS Lookup",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "refresh": "10s",
+ "schemaVersion": 16,
+ "style": "dark",
+ "tags": [
+ "blackbox",
+ "prometheus"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "datasource",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "auto": true,
+ "auto_count": 10,
+ "auto_min": "10s",
+ "current": {
+ "text": "10s",
+ "value": "10s"
+ },
+ "hide": 0,
+ "label": "Interval",
+ "name": "interval",
+ "options": [
+ {
+ "selected": false,
+ "text": "auto",
+ "value": "$__auto_interval_interval"
+ },
+ {
+ "selected": false,
+ "text": "5s",
+ "value": "5s"
+ },
+ {
+ "selected": true,
+ "text": "10s",
+ "value": "10s"
+ },
+ {
+ "selected": false,
+ "text": "30s",
+ "value": "30s"
+ },
+ {
+ "selected": false,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "type": "interval"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": true,
+ "name": "target",
+ "options": [],
+ "query": "label_values(probe_success, instance)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-4h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "HTTP Exporter",
+ "version": 1
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_icmp.json b/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_icmp.json
new file mode 100644
index 0000000..df30506
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_icmp.json
@@ -0,0 +1,368 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "localhost",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "6.5.2"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "panel",
+ "id": "heatmap",
+ "name": "Heatmap",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": 12412,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1591284149575,
+ "links": [],
+ "panels": [
+ {
+ "cards": {
+ "cardPadding": null,
+ "cardRound": null
+ },
+ "color": {
+ "cardColor": "#b4ff00",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateRdYlGn",
+ "exponent": 0.5,
+ "mode": "spectrum"
+ },
+ "dataFormat": "tsbuckets",
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "heatmap": {},
+ "hideZeroBuckets": false,
+ "highlightCards": true,
+ "id": 7,
+ "legend": {
+ "show": true
+ },
+ "options": {},
+ "reverseYBuckets": true,
+ "targets": [
+ {
+ "expr": "sum(probe_icmp_duration_seconds{phase=\"rtt\"}) by (instance)",
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "ICMP RTT",
+ "tooltip": {
+ "show": true,
+ "showHistogram": true
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "xBucketNumber": null,
+ "xBucketSize": null,
+ "yAxis": {
+ "decimals": null,
+ "format": "s",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true,
+ "splitFactor": null
+ },
+ "yBucketBound": "middle",
+ "yBucketNumber": null,
+ "yBucketSize": null
+ },
+ {
+ "cards": {
+ "cardPadding": null,
+ "cardRound": null
+ },
+ "color": {
+ "cardColor": "#b4ff00",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateRdYlGn",
+ "exponent": 0.5,
+ "mode": "spectrum"
+ },
+ "dataFormat": "tsbuckets",
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 8,
+ "w": 24,
+ "x": 0,
+ "y": 8
+ },
+ "heatmap": {},
+ "hideZeroBuckets": false,
+ "highlightCards": true,
+ "id": 8,
+ "legend": {
+ "show": true
+ },
+ "options": {},
+ "reverseYBuckets": true,
+ "targets": [
+ {
+ "expr": "1-avg_over_time(probe_success{instance=~\"$instance\"}[$__interval])",
+ "format": "time_series",
+ "hide": false,
+ "legendFormat": "{{instance}}",
+ "refId": "A"
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "ICMP packet loss",
+ "tooltip": {
+ "show": true,
+ "showHistogram": true
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "xBucketNumber": null,
+ "xBucketSize": null,
+ "yAxis": {
+ "decimals": null,
+ "format": "percentunit",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true,
+ "splitFactor": null
+ },
+ "yBucketBound": "middle",
+ "yBucketNumber": null,
+ "yBucketSize": null
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "description": "This uses the blackbox exporter, which does not expose packet loss, for example. It could be improved with https://github.com/SuperQ/smokeping_prober because it also keeps track of lost samples (https://github.com/SuperQ/smokeping_prober/issues/24). Unfortunately, that still won't make graphs as nice as smokeping, because each probe only keeps one sample, instead of doing multiple like smokeping does (https://github.com/SuperQ/smokeping_prober/issues/36).",
+ "fill": 0,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 24,
+ "x": 0,
+ "y": 16
+ },
+ "hiddenSeries": false,
+ "id": 2,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "maxPerRow": 2,
+ "nullPointMode": "connected",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 0.5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "instance",
+ "repeatDirection": "v",
+ "seriesOverrides": [
+ {
+ "alias": "packet loss",
+ "color": "#C4162A",
+ "lines": false,
+ "pointradius": 1,
+ "points": true,
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": true,
+ "targets": [
+ {
+ "expr": "sum(probe_icmp_duration_seconds{phase=\"rtt\",instance=~\"$instance\"}) by (instance) > 0",
+ "instant": false,
+ "legendFormat": "RTT",
+ "refId": "A"
+ },
+ {
+ "expr": "1-avg_over_time(probe_success{instance=~\"$instance\"}[$__interval])",
+ "format": "time_series",
+ "legendFormat": "packet loss",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "ICMP round trip time ($instance)",
+ "tooltip": {
+ "shared": true,
+ "sort": 1,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "dtdurations",
+ "label": "RTT",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "percentunit",
+ "label": "packet loss",
+ "logBase": 1,
+ "max": "1",
+ "min": "0.0001",
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": false,
+ "schemaVersion": 21,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "datasource",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(probe_success, instance)",
+ "hide": 0,
+ "includeAll": true,
+ "label": null,
+ "multi": true,
+ "name": "instance",
+ "options": [],
+ "query": "label_values(probe_success, instance)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-4h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ]
+ },
+ "timezone": "",
+ "title": "ICMP exporter",
+ "version": 1,
+ "description": "Graph ICMP metrics from the blackbox exporter, Smokeping-style"
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/consul.json b/terraform-ci-infra/1n_nmd/grafana/conf/consul.json
new file mode 100644
index 0000000..2e4a36f
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/consul.json
@@ -0,0 +1,1438 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "4.3.0-beta1"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": []
+ },
+ "editable": true,
+ "gnetId": 2351,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "rows": [
+ {
+ "collapse": false,
+ "height": 153,
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 1,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "consul_raft_leader_lastcontact_count",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{host}}",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "",
+ "title": "Consul Leader",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "name"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 3,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 17,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "COUNT (changes(consul_memberlist_gossep_sum[1m]) > 0) BY (labels)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "# servers in cluster",
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 18,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(node_cpu{mode=\"idle\", host=\"$consul\"}[1m])) * 100 / count_scalar(node_cpu{mode=\"user\", host=\"$consul\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "",
+ "title": "CPU Idle",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 4,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 14,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_load1{host=\"$consul\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "Load 1",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 4,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 15,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_load5{host=\"$consul\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "Load 5",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 4,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 16,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_load15{host=\"$consul\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "1,2",
+ "title": "Load 15",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "The amount of TCP messages that are sent/received from the server.",
+ "fill": 1,
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(consul_memberlist_tcp{host=\"$consul\"}[1m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{type}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memberlist TCP Messages",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "The amount of UDP messages that are sent/received from the server.",
+ "fill": 1,
+ "id": 5,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(consul_memberlist_udp{host=\"$consul\"}[1m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{type}}",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memberlist UDP Messages",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "This measures the time it takes to replicate log entries to followers. This is a general indicator of the load pressure on the Consul servers, as well as the performance of the communication between the servers.",
+ "fill": 1,
+ "id": 6,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "consul_raft_replication_appendEntries_rpc",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{query}} - {{quantile}}%",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Log replication from leader to servers",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 7,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "consul_raft_replication_heartbeat",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{query}} - {{quantile}}%",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "consul_raft_replication_heartbeat",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "This measures the time it takes for the leader to write log entries to disk.",
+ "fill": 1,
+ "id": 8,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "consul_raft_leader_dispatchLog",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{quantile}}%",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Write logs",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "This measures the time it takes to commit a new entry to the Raft log on the leader.",
+ "fill": 1,
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "consul_raft_commitTime",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{quantile}}%",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Commit time Leader",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "This counts the number of Raft transactions occurring over the interval, which is a general indicator of the write load on the Consul servers.",
+ "fill": 1,
+ "id": 9,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "delta(consul_raft_apply[30s])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Transactions",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Raft Transactions",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "This will only be emitted by the Raft leader and measures the time since the leader was last able to contact the follower nodes when checking its leader lease. It can be used as a measure for how stable the Raft timing is and how close the leader is to timing out its lease.\n\nThe lease timeout is 500 ms times the raft_multiplier configuration, so this telemetry value should not be getting close to that configured value, otherwise the Raft timing is marginal and might need to be tuned, or more powerful servers might be needed. See the Server Performance guide for more details.",
+ "fill": 1,
+ "id": 10,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "consul_raft_leader_lastcontact",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{quantile}}%",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Leader lastContact",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 250,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "id": 12,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "delta(consul_rpc_query{host=\"$consul\"}[30s])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Requests",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "RPC Requests",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Consul uses a network tomography system to compute network coordinates for nodes in the cluster. These coordinates allow the network round trip time to be estimated between any two nodes using a very simple calculation. This allows for many useful applications, such as finding the service node nearest a requesting node, or failing over to services in the next closest datacenter.",
+ "fill": 1,
+ "id": 13,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "consul_serf_coordinate_adjustment_ms{host=\"$consul\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{quantile}}%",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Serf Coordinates",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Dashboard Row",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "datasource",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "consul",
+ "options": [],
+ "query": "label_values(consul_memberlist_gossep_sum, host)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-4h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Consul",
+ "version": 1
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/docker_cadvisor.json b/terraform-ci-infra/1n_nmd/grafana/conf/docker_cadvisor.json
new file mode 100644
index 0000000..bbad614
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/docker_cadvisor.json
@@ -0,0 +1,2040 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "6.2.4"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": ""
+ },
+ {
+ "type": "panel",
+ "id": "table",
+ "name": "Table",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "A simple overview of the most important Docker host and container metrics. (cAdvisor/Prometheus)",
+ "editable": true,
+ "gnetId": 10657,
+ "graphTooltip": 1,
+ "id": null,
+ "iteration": 1564715574785,
+ "links": [],
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "editable": true,
+ "error": false,
+ "format": "s",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 0,
+ "y": 0
+ },
+ "height": "",
+ "id": 24,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "30%",
+ "prefix": "",
+ "prefixFontSize": "20%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "time() - node_boot_time_seconds{instance=~\"$node:.*\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": "",
+ "title": "Uptime",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 4,
+ "y": 0
+ },
+ "id": 31,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(container_last_seen{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": "",
+ "title": "Containers",
+ "type": "singlestat",
+ "valueFontSize": "120%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "editable": true,
+ "error": false,
+ "format": "decbytes",
+ "gauge": {
+ "maxValue": 500000000,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 8,
+ "y": 0
+ },
+ "id": 30,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "(node_memory_SwapTotal_bytes{instance=~'$node:9100'} - node_memory_SwapFree_bytes{instance=~'$node:9100'})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": "400000000",
+ "title": "Swap",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "editable": true,
+ "error": false,
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 12,
+ "y": 0
+ },
+ "id": 27,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(50, 189, 31, 0.18)",
+ "full": false,
+ "lineColor": "rgb(69, 193, 31)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_load1{instance=~\"$node:9100\"} / count by(job, instance)(count by(job, instance, cpu)(node_cpu_seconds_total{instance=~\"$node:9100\"}))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": "0.8,0.9",
+ "title": "Load",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "alert": {
+ "conditions": [
+ {
+ "evaluator": {
+ "params": [
+ 10000000000
+ ],
+ "type": "gt"
+ },
+ "query": {
+ "params": [
+ "A",
+ "5m",
+ "now"
+ ]
+ },
+ "reducer": {
+ "params": [],
+ "type": "avg"
+ },
+ "type": "query"
+ }
+ ],
+ "executionErrorState": "alerting",
+ "frequency": "60s",
+ "handler": 1,
+ "name": "Available Memory alert",
+ "noDataState": "keep_state",
+ "notifications": [
+ {
+ "id": 1
+ }
+ ]
+ },
+ "aliasColors": {
+ "Available Memory": "#7EB26D",
+ "Unavailable Memory": "#7EB26D"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 10,
+ "w": 4,
+ "x": 16,
+ "y": 0
+ },
+ "id": 20,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "container_memory_rss{name=~\".+\"}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "D",
+ "step": 20
+ },
+ {
+ "expr": "sum(container_memory_rss{name=~\".+\"})",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "container_memory_usage_bytes{name=~\".+\"}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "expr": "container_memory_rss{id=\"/\"}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "C",
+ "step": 20
+ },
+ {
+ "expr": "sum(container_memory_rss)",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "E",
+ "step": 20
+ },
+ {
+ "expr": "node_memory_Buffers",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "node_memory_Dirty",
+ "refId": "N",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_MemFree",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "F",
+ "step": 20
+ },
+ {
+ "expr": "node_memory_MemAvailable",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "Available Memory",
+ "refId": "H",
+ "step": 20
+ },
+ {
+ "expr": "node_memory_MemTotal_bytes{instance=~\"$node:9100\"} - node_memory_MemAvailable_bytes{instance=~\"$node:9100\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Unavailable Memory",
+ "refId": "G",
+ "step": 600
+ },
+ {
+ "expr": "node_memory_Inactive",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "I",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_KernelStack",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "J",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_Active",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "K",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_MemTotal - (node_memory_Active + node_memory_MemFree + node_memory_Inactive)",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "Unknown",
+ "refId": "L",
+ "step": 40
+ },
+ {
+ "expr": "node_memory_MemFree + node_memory_Inactive ",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "M",
+ "step": 30
+ },
+ {
+ "expr": "container_memory_rss{name=~\".+\"}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "O",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_Inactive + node_memory_MemFree + node_memory_MemAvailable",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "P",
+ "step": 40
+ }
+ ],
+ "thresholds": [
+ {
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 10000000000,
+ "yaxis": "left"
+ }
+ ],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Available Memory",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": 16000000000,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "alert": {
+ "conditions": [
+ {
+ "evaluator": {
+ "params": [
+ 850000000000
+ ],
+ "type": "gt"
+ },
+ "query": {
+ "params": [
+ "A",
+ "5m",
+ "now"
+ ]
+ },
+ "reducer": {
+ "params": [],
+ "type": "avg"
+ },
+ "type": "query"
+ }
+ ],
+ "executionErrorState": "alerting",
+ "frequency": "60s",
+ "handler": 1,
+ "name": "Free/Used Disk Space alert",
+ "noDataState": "keep_state",
+ "notifications": [
+ {
+ "id": 1
+ }
+ ]
+ },
+ "aliasColors": {
+ "Belegete Festplatte": "#BF1B00",
+ "Free Disk Space": "#7EB26D",
+ "Used Disk Space": "#7EB26D",
+ "{}": "#BF1B00"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 10,
+ "w": 4,
+ "x": 20,
+ "y": 0
+ },
+ "id": 13,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Used Disk Space",
+ "yaxis": 1
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_size_bytes{fstype=\"rootfs\"} - node_filesystem_free_bytes{fstype=\"rootfs\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Used Disk Space",
+ "refId": "A",
+ "step": 600
+ }
+ ],
+ "thresholds": [
+ {
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 850000000000
+ }
+ ],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Used Disk Space",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": 1000000000000,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "SENT": "#BF1B00"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 0,
+ "y": 4
+ },
+ "id": 19,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "RECEIVED",
+ "refId": "A",
+ "step": 600
+ },
+ {
+ "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "SENT",
+ "refId": "B",
+ "step": 600
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 4,
+ "y": 4
+ },
+ "id": 25,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "((node_memory_MemTotal_bytes{instance=~\"$node:9100\"} - node_memory_MemAvailable_bytes{instance=~\"$node:9100\"}) / node_memory_MemTotal_bytes{instance=~\"$node:9100\"}) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": "70, 90",
+ "title": "Memory",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "aliasColors": {
+ "{id=\"/\",instance=\"cadvisor:8080\",job=\"prometheus\"}": "#BA43A9"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 8,
+ "y": 4
+ },
+ "id": 5,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_cpu_system_seconds_total[1m]))",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "a",
+ "refId": "B",
+ "step": 120
+ },
+ {
+ "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m]))",
+ "format": "time_series",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "nur container",
+ "refId": "F",
+ "step": 10
+ },
+ {
+ "expr": "sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m]))",
+ "format": "time_series",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "nur docker host",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "sum(rate(process_cpu_seconds_total[$interval])) * 100",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "host",
+ "metric": "",
+ "refId": "C",
+ "step": 600
+ },
+ {
+ "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m])) + sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m])) + sum(rate(process_cpu_seconds_total[1m]))",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 120
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU Usage",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "IN on /sda": "#7EB26D",
+ "OUT on /sda": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 12,
+ "y": 4
+ },
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "-sum(rate(node_disk_read_bytes_total[$interval])) by (device)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "OUT on /{{device}}",
+ "metric": "node_disk_bytes_read",
+ "refId": "A",
+ "step": 600
+ },
+ {
+ "expr": "sum(rate(node_disk_written_bytes_total[$interval])) by (device)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "IN on /{{device}}",
+ "metric": "",
+ "refId": "B",
+ "step": 600
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk I/O",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 10
+ },
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_network_receive_bytes_total{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}[$interval])) by (name)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Received Network Traffic per Container",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 10
+ },
+ "id": 9,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_network_transmit_bytes_total{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}[$interval])) by (name)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Sent Network Traffic per Container",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 10,
+ "max": 8,
+ "min": 0,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 5,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 17
+ },
+ "id": 1,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_cpu_usage_seconds_total{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}[$interval])) by (name) * 100",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "metric": "",
+ "refId": "F",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU Usage per Container",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 3,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 17
+ },
+ "id": 34,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_swap{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}) by (name)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "container_memory_usage_bytes{name=~\".+\"}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Swap per Container",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "editable": true,
+ "error": false,
+ "fill": 3,
+ "grid": {},
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 24
+ },
+ "id": 10,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_rss{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}) by (name)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "container_memory_usage_bytes{name=~\".+\"}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Usage per Container",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "columns": [
+ {
+ "text": "Current",
+ "value": "current"
+ }
+ ],
+ "editable": true,
+ "error": false,
+ "fontSize": "100%",
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 24
+ },
+ "id": 36,
+ "links": [],
+ "options": {},
+ "pageSize": null,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 0,
+ "desc": true
+ },
+ "styles": [
+ {
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [
+ "10000000",
+ " 25000000"
+ ],
+ "type": "number",
+ "unit": "decbytes"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"} - container_memory_usage_bytes{name=~\".+\"}) by (name) ",
+ "format": "table",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "metric": "",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "sum(container_spec_memory_limit_bytes{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}) by (name) ",
+ "format": "table",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "container_memory_usage_bytes{name=~\".+\"}",
+ "format": "table",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "C",
+ "step": 240
+ }
+ ],
+ "title": "Limit memory",
+ "transform": "table",
+ "type": "table"
+ }
+ ],
+ "refresh": "5m",
+ "schemaVersion": 18,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "datasource",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(container_cpu_user_seconds_total, job)",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Job",
+ "multi": false,
+ "name": "job",
+ "options": [],
+ "query": "label_values(container_cpu_user_seconds_total, job)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(container_cpu_user_seconds_total{job=~\"$job\"}, instance)",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Host:",
+ "multi": false,
+ "name": "node",
+ "options": [],
+ "query": "label_values(container_cpu_user_seconds_total{job=~\"$job\"}, instance)",
+ "refresh": 1,
+ "regex": "/([^:]+):.*/",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": null,
+ "tags": [],
+ "tagsQuery": null,
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(container_cpu_user_seconds_total{instance=~\"$node:(.*)\"}, instance)",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Port",
+ "multi": false,
+ "name": "port",
+ "options": [],
+ "query": "label_values(container_cpu_user_seconds_total{instance=~\"$node:(.*)\"}, instance)",
+ "refresh": 1,
+ "regex": "/[^:]+:(.*)/",
+ "skipUrlSync": false,
+ "sort": 3,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "auto": true,
+ "auto_count": 30,
+ "auto_min": "50s",
+ "current": {
+ "text": "1m",
+ "value": "1m"
+ },
+ "hide": 0,
+ "label": "Interval",
+ "name": "interval",
+ "options": [
+ {
+ "selected": false,
+ "text": "auto",
+ "value": "$__auto_interval_interval"
+ },
+ {
+ "selected": true,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "5m",
+ "value": "5m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "1m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "skipUrlSync": false,
+ "type": "interval"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-4h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Docker cAdvisor",
+ "version": 1
+}
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/node_exporter.json b/terraform-ci-infra/1n_nmd/grafana/conf/node_exporter.json
new file mode 100644
index 0000000..766d5af
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/node_exporter.json
@@ -0,0 +1,13696 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "panel",
+ "id": "gauge",
+ "name": "Gauge",
+ "version": ""
+ },
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "6.7.3"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "$$hashKey": "object:1058",
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": 1860,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1595837627257,
+ "links": [],
+ "panels": [
+ {
+ "collapsed": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 261,
+ "panels": [],
+ "repeat": null,
+ "title": "Quick CPU / Mem / Disk",
+ "type": "row"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Busy state of all CPU cores together",
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 0,
+ "y": 1
+ },
+ "id": 20,
+ "links": [],
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "id": 0,
+ "op": "=",
+ "text": "N/A",
+ "type": 1,
+ "value": "null"
+ }
+ ],
+ "max": 100,
+ "min": 0,
+ "nullValueMode": "null",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 85
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 95
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": [],
+ "values": false
+ },
+ "orientation": "horizontal",
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "6.7.3",
+ "targets": [
+ {
+ "expr": "(((count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))) - avg(sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])))) * 100) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "title": "CPU Busy",
+ "type": "gauge"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Busy state of all CPU cores together (5 min average)",
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 3,
+ "y": 1
+ },
+ "id": 155,
+ "links": [],
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "id": 0,
+ "op": "=",
+ "text": "N/A",
+ "type": 1,
+ "value": "null"
+ }
+ ],
+ "max": 100,
+ "min": 0,
+ "nullValueMode": "null",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 85
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 95
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": [],
+ "values": false
+ },
+ "orientation": "horizontal",
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "6.7.3",
+ "targets": [
+ {
+ "expr": "avg(node_load5{instance=\"$node\",job=\"$job\"}) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)) * 100",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "title": "Sys Load (5m avg)",
+ "type": "gauge"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Busy state of all CPU cores together (15 min average)",
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 6,
+ "y": 1
+ },
+ "id": 19,
+ "links": [],
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "id": 0,
+ "op": "=",
+ "text": "N/A",
+ "type": 1,
+ "value": "null"
+ }
+ ],
+ "max": 100,
+ "min": 0,
+ "nullValueMode": "null",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 85
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 95
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": [],
+ "values": false
+ },
+ "orientation": "horizontal",
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "6.7.3",
+ "targets": [
+ {
+ "expr": "avg(node_load15{instance=\"$node\",job=\"$job\"}) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)) * 100",
+ "hide": false,
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "title": "Sys Load (15m avg)",
+ "type": "gauge"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Non available RAM memory",
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 9,
+ "y": 1
+ },
+ "hideTimeOverride": false,
+ "id": 16,
+ "links": [],
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "decimals": 0,
+ "mappings": [],
+ "max": 100,
+ "min": 0,
+ "nullValueMode": "null",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 80
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 90
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": [],
+ "values": false
+ },
+ "orientation": "horizontal",
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "6.7.3",
+ "targets": [
+ {
+ "expr": "((node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} )) * 100",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ },
+ {
+ "expr": "100 - ((node_memory_MemAvailable_bytes{instance=\"$node\",job=\"$job\"} * 100) / node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "refId": "B",
+ "step": 900
+ }
+ ],
+ "title": "RAM Used",
+ "type": "gauge"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Used Swap",
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 12,
+ "y": 1
+ },
+ "id": 21,
+ "links": [],
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "id": 0,
+ "op": "=",
+ "text": "N/A",
+ "type": 1,
+ "value": "null"
+ }
+ ],
+ "max": 100,
+ "min": 0,
+ "nullValueMode": "null",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 10
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 25
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": [],
+ "values": false
+ },
+ "orientation": "horizontal",
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "6.7.3",
+ "targets": [
+ {
+ "expr": "((node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} )) * 100",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "title": "SWAP Used",
+ "type": "gauge"
+ },
+ {
+ "cacheTimeout": null,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Used Root FS",
+ "gridPos": {
+ "h": 4,
+ "w": 3,
+ "x": 15,
+ "y": 1
+ },
+ "id": 154,
+ "links": [],
+ "options": {
+ "fieldOptions": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "defaults": {
+ "color": {
+ "mode": "thresholds"
+ },
+ "mappings": [
+ {
+ "id": 0,
+ "op": "=",
+ "text": "N/A",
+ "type": 1,
+ "value": "null"
+ }
+ ],
+ "max": 100,
+ "min": 0,
+ "nullValueMode": "null",
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgba(50, 172, 45, 0.97)",
+ "value": null
+ },
+ {
+ "color": "rgba(237, 129, 40, 0.89)",
+ "value": 80
+ },
+ {
+ "color": "rgba(245, 54, 54, 0.9)",
+ "value": 90
+ }
+ ]
+ },
+ "unit": "percent"
+ },
+ "overrides": [],
+ "values": false
+ },
+ "orientation": "horizontal",
+ "showThresholdLabels": false,
+ "showThresholdMarkers": true
+ },
+ "pluginVersion": "6.7.3",
+ "targets": [
+ {
+ "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"})",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "title": "Root FS Used",
+ "type": "gauge"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Total number of CPU cores",
+ "format": "short",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 2,
+ "x": 18,
+ "y": 1
+ },
+ "id": 14,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "thresholds": "",
+ "title": "CPU Cores",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 1,
+ "description": "System uptime",
+ "format": "s",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 4,
+ "x": 20,
+ "y": 1
+ },
+ "hideTimeOverride": true,
+ "id": 15,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "$$hashKey": "object:1094",
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "$$hashKey": "object:1095",
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "null",
+ "nullText": null,
+ "postfix": "s",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_time_seconds{instance=\"$node\",job=\"$job\"} - node_boot_time_seconds{instance=\"$node\",job=\"$job\"}",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 1800
+ }
+ ],
+ "thresholds": "",
+ "title": "Uptime",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "$$hashKey": "object:1097",
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "description": "Total RootFS",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 2,
+ "x": 18,
+ "y": 3
+ },
+ "id": 23,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "thresholds": "70,90",
+ "title": "RootFS Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "description": "Total RAM",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 2,
+ "x": 20,
+ "y": 3
+ },
+ "id": 75,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "70%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "thresholds": "",
+ "title": "RAM Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 0,
+ "description": "Total SWAP",
+ "format": "bytes",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 2,
+ "w": 2,
+ "x": 22,
+ "y": 3
+ },
+ "id": 18,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "70%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"}",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 900
+ }
+ ],
+ "thresholds": "",
+ "title": "SWAP Total",
+ "type": "singlestat",
+ "valueFontSize": "50%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "collapsed": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 5
+ },
+ "id": 263,
+ "panels": [],
+ "repeat": null,
+ "title": "Basic CPU / Mem / Net / Disk",
+ "type": "row"
+ },
+ {
+ "aliasColors": {
+ "Busy": "#EAB839",
+ "Busy Iowait": "#890F02",
+ "Busy other": "#1F78C1",
+ "Idle": "#052B51",
+ "Idle - Waiting for something to happen": "#052B51",
+ "guest": "#9AC48A",
+ "idle": "#052B51",
+ "iowait": "#EAB839",
+ "irq": "#BF1B00",
+ "nice": "#C15C17",
+ "softirq": "#E24D42",
+ "steal": "#FCE2DE",
+ "system": "#508642",
+ "user": "#5195CE"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "description": "Basic CPU info",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 6
+ },
+ "hiddenSeries": false,
+ "id": 77,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 250,
+ "sort": null,
+ "sortDesc": null,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": true,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Busy Iowait",
+ "color": "#890F02"
+ },
+ {
+ "alias": "Idle",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "Busy System",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "Busy User",
+ "color": "#0A437C"
+ },
+ {
+ "alias": "Busy Other",
+ "color": "#6D1F62"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Busy System",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode='user',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Busy User",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode='iowait',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Busy Iowait",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode=~\".*irq\",instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Busy IRQs",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "sum (irate(node_cpu_seconds_total{mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Busy Other",
+ "refId": "E",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Idle",
+ "refId": "F",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU Basic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": "100",
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "SWAP Used": "#BF1B00",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap Used": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "description": "Basic memory usage",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 6
+ },
+ "hiddenSeries": false,
+ "id": 78,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "RAM Total",
+ "color": "#E0F9D7",
+ "fill": 0,
+ "stack": false
+ },
+ {
+ "alias": "RAM Cache + Buffer",
+ "color": "#052B51"
+ },
+ {
+ "alias": "RAM Free",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "Avaliable",
+ "color": "#DEDAF7",
+ "fill": 0,
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "RAM Total",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - (node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "RAM Used",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "RAM Cache + Buffer",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "RAM Free",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "SWAP Used",
+ "refId": "E",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Basic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Recv_bytes_eth2": "#7EB26D",
+ "Recv_bytes_lo": "#0A50A1",
+ "Recv_drop_eth2": "#6ED0E0",
+ "Recv_drop_lo": "#E0F9D7",
+ "Recv_errs_eth2": "#BF1B00",
+ "Recv_errs_lo": "#CCA300",
+ "Trans_bytes_eth2": "#7EB26D",
+ "Trans_bytes_lo": "#0A50A1",
+ "Trans_drop_eth2": "#6ED0E0",
+ "Trans_drop_lo": "#E0F9D7",
+ "Trans_errs_eth2": "#BF1B00",
+ "Trans_errs_lo": "#CCA300",
+ "recv_bytes_lo": "#0A50A1",
+ "recv_drop_eth0": "#99440A",
+ "recv_drop_lo": "#967302",
+ "recv_errs_eth0": "#BF1B00",
+ "recv_errs_lo": "#890F02",
+ "trans_bytes_eth0": "#7EB26D",
+ "trans_bytes_lo": "#0A50A1",
+ "trans_drop_eth0": "#99440A",
+ "trans_drop_lo": "#967302",
+ "trans_errs_eth0": "#BF1B00",
+ "trans_errs_lo": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "Basic network info per interface",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 13
+ },
+ "hiddenSeries": false,
+ "id": 74,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "recv {{device}}",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "trans {{device}} ",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Basic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "pps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 3,
+ "description": "Disk space used of all filesystems mounted",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 12,
+ "y": 13
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 152,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}}",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk Space Used Basic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": null,
+ "logBase": 1,
+ "max": "100",
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 20
+ },
+ "id": 265,
+ "panels": [
+ {
+ "aliasColors": {
+ "Idle - Waiting for something to happen": "#052B51",
+ "guest": "#9AC48A",
+ "idle": "#052B51",
+ "iowait": "#EAB839",
+ "irq": "#BF1B00",
+ "nice": "#C15C17",
+ "softirq": "#E24D42",
+ "steal": "#FCE2DE",
+ "system": "#508642",
+ "user": "#5195CE"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "description": "",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 0,
+ "y": 21
+ },
+ "hiddenSeries": false,
+ "id": 3,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 250,
+ "sort": null,
+ "sortDesc": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": true,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "interval": "10s",
+ "intervalFactor": 2,
+ "legendFormat": "System - Processes executing in kernel mode",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='user',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "User - Normal processes executing in user mode",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='nice',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Nice - Niced processes executing in user mode",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Idle - Waiting for something to happen",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='iowait',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Iowait - Waiting for I/O to complete",
+ "refId": "E",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='irq',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Irq - Servicing interrupts",
+ "refId": "F",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='softirq',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Softirq - Servicing softirqs",
+ "refId": "G",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='steal',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Steal - Time spent in other operating systems when running in a virtualized environment",
+ "refId": "H",
+ "step": 240
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='guest',instance=\"$node\",job=\"$job\"}[5m])) * 100",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Guest - Time spent running a virtual CPU for a guest operating system",
+ "refId": "I",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "percentage",
+ "logBase": 1,
+ "max": "100",
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap - Swap memory usage": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839",
+ "Unused - Free memory unassigned": "#052B51"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "description": "",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 12,
+ "y": 21
+ },
+ "hiddenSeries": false,
+ "id": 24,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "sort": null,
+ "sortDesc": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Hardware Corrupted - *./",
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"} - node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Apps - Memory used by user-space applications",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "PageTables - Memory used to map between virtual and physical memory addresses",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Cache - Parked file data (file content) cache",
+ "refId": "E",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Buffers - Block device (e.g. harddisk) cache",
+ "refId": "F",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Unused - Free memory unassigned",
+ "refId": "G",
+ "step": 240
+ },
+ {
+ "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Swap - Swap space used",
+ "refId": "H",
+ "step": 240
+ },
+ {
+ "expr": "node_memory_HardwareCorrupted_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working",
+ "refId": "I",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Stack",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "receive_packets_eth0": "#7EB26D",
+ "receive_packets_lo": "#E24D42",
+ "transmit_packets_eth0": "#7EB26D",
+ "transmit_packets_lo": "#E24D42"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 0,
+ "y": 33
+ },
+ "hiddenSeries": false,
+ "id": 84,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:5871",
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Transmit",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:5884",
+ "format": "bps",
+ "label": "bits out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:5885",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 3,
+ "description": "",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 12,
+ "y": 33
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 156,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": false,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}}",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk Space Used",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 0,
+ "y": 45
+ },
+ "hiddenSeries": false,
+ "id": 229,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Read.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - Reads completed",
+ "refId": "A",
+ "step": 480
+ },
+ {
+ "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Writes completed",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk IOps",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "iops",
+ "label": "IO read (-) / write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "io time": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 3,
+ "description": "",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 12,
+ "y": 45
+ },
+ "hiddenSeries": false,
+ "id": 42,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": null,
+ "sortDesc": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*read*./",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*sda.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde.*/",
+ "color": "#E24D42"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Successfully read bytes",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Successfully written bytes",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "I/O Usage Read / Write",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes read (-) / write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ms",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "io time": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 3,
+ "description": "",
+ "fill": 4,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 12,
+ "w": 12,
+ "x": 0,
+ "y": 57
+ },
+ "hiddenSeries": false,
+ "id": 127,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": null,
+ "sortDesc": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"} [5m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Time spent doing I/Os",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "I/O Usage Times",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "time",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "s",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "CPU / Memory / Net / Disk",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 21
+ },
+ "id": 266,
+ "panels": [
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 70
+ },
+ "hiddenSeries": false,
+ "id": 136,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 2,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Inactive_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Inactive - Memory which has been less recently used. It is more eligible to be reclaimed for other purposes",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Active_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Active - Memory that has been used more recently and usually not reclaimed unless absolutely necessary",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Active / Inactive",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 70
+ },
+ "hiddenSeries": false,
+ "id": 135,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Committed_AS - *./"
+ },
+ {
+ "alias": "/.*CommitLimit - *./",
+ "color": "#BF1B00",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Committed_AS_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Committed_AS - Amount of memory presently allocated on the system",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_CommitLimit_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "CommitLimit - Amount of memory currently available to be allocated on the system",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Commited",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 80
+ },
+ "hiddenSeries": false,
+ "id": 191,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Inactive_file_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Inactive_file - File-backed memory on inactive LRU list",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Inactive_anon_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Inactive_anon - Anonymous and swap cache on inactive LRU list, including tmpfs (shmem)",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Active_file_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Active_file - File-backed memory on active LRU list",
+ "refId": "C",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Active_anon_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Active_anon - Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs",
+ "refId": "D",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Active / Inactive Detail",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "Total Swap": "#614D93",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 80
+ },
+ "hiddenSeries": false,
+ "id": 130,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 2,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Writeback_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Writeback - Memory which is actively being written back to disk",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_WritebackTmp_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "WritebackTmp - Memory used by FUSE for temporary writeback buffers",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Dirty_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Dirty - Memory which is waiting to get written back to the disk",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Writeback and Dirty",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 90
+ },
+ "hiddenSeries": false,
+ "id": 138,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:4131",
+ "alias": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages",
+ "fill": 0
+ },
+ {
+ "$$hashKey": "object:4138",
+ "alias": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Mapped_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Mapped - Used memory in mapped pages files which have been mmaped, such as libraries",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Shmem_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Shmem - Used shared memory (shared between several processes, thus including RAM disks)",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_ShmemHugePages_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages",
+ "refId": "C",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_ShmemPmdMapped_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "ShmemPmdMapped - Ammount of shared (shmem/tmpfs) memory backed by huge pages",
+ "refId": "D",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Shared and Mapped",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:4106",
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:4107",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "Total Swap": "#614D93",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 90
+ },
+ "hiddenSeries": false,
+ "id": 131,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 2,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_SUnreclaim_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "SUnreclaim - Part of Slab, that cannot be reclaimed on memory pressure",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "SReclaimable - Part of Slab, that might be reclaimed, such as caches",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Slab",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 100
+ },
+ "hiddenSeries": false,
+ "id": 70,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_VmallocChunk_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "VmallocChunk - Largest contigious block of vmalloc area which is free",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_VmallocTotal_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "VmallocTotal - Total size of vmalloc memory area",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_VmallocUsed_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "VmallocUsed - Amount of vmalloc area which is used",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Vmalloc",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 100
+ },
+ "hiddenSeries": false,
+ "id": 159,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Bounce_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Bounce - Memory used for block device bounce buffers",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Bounce",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 110
+ },
+ "hiddenSeries": false,
+ "id": 129,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Inactive *./",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_AnonHugePages_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "AnonHugePages - Memory in anonymous huge pages",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_AnonPages_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "AnonPages - Memory in user pages not backed by files",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Anonymous",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 110
+ },
+ "hiddenSeries": false,
+ "id": 160,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 2,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_KernelStack_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "KernelStack - Kernel memory stack. This is not reclaimable",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Percpu_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "PerCPU - Per CPU memory allocated dynamically by loadable modules",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Kernel / CPU",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#806EB7",
+ "Total RAM + Swap": "#806EB7",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 120
+ },
+ "hiddenSeries": false,
+ "id": 140,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_HugePages_Free{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "HugePages_Free - Huge pages in the pool that are not yet allocated",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_HugePages_Rsvd{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "HugePages_Rsvd - Huge pages for which a commitment to allocate from the pool has been made, but no allocation has yet been made",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_HugePages_Surp{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "HugePages_Surp - Huge pages in the pool above the value in /proc/sys/vm/nr_hugepages",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory HugePages Counter",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "pages",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#806EB7",
+ "Total RAM + Swap": "#806EB7",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 120
+ },
+ "hiddenSeries": false,
+ "id": 71,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 2,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_HugePages_Total{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "HugePages - Total size of the pool of huge pages",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Hugepagesize - Huge Page size",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory HugePages Size",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 130
+ },
+ "hiddenSeries": false,
+ "id": 128,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_DirectMap1G_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "DirectMap1G - Amount of pages mapped as this size",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_DirectMap2M_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "DirectMap2M - Amount of pages mapped as this size",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_DirectMap4k_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "DirectMap4K - Amount of pages mapped as this size",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory DirectMap",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 130
+ },
+ "hiddenSeries": false,
+ "id": 137,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_Unevictable_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Unevictable - Amount of unevictable memory that can't be swapped out for a variety of reasons",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_memory_Mlocked_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "MLocked - Size of pages locked to memory using the mlock() system call",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Unevictable and MLocked",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "Total Swap": "#614D93",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 140
+ },
+ "hiddenSeries": false,
+ "id": 132,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_NFS_Unstable_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "NFS Unstable - Memory in NFS pages sent to the server, but not yet commited to the storage",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory NFS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Memory Meminfo",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 22
+ },
+ "id": 267,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 176,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*out/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_vmstat_pgpgin{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pagesin - Page in operations",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_vmstat_pgpgout{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pagesout - Page out operations",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Pages In / Out",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "pages out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 22,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*out/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_vmstat_pswpin{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pswpin - Pages swapped in",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_vmstat_pswpout{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pswpout - Pages swapped out",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Pages Swap In / Out",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "pages out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Apps": "#629E51",
+ "Buffers": "#614D93",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Free": "#0A437C",
+ "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF",
+ "Inactive": "#584477",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "RAM_Free": "#E0F9D7",
+ "Slab": "#806EB7",
+ "Slab_Cache": "#E0752D",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Swap_Free": "#2F575E",
+ "Unused": "#EAB839"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 33
+ },
+ "hiddenSeries": false,
+ "id": 175,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 350,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:6118",
+ "alias": "Pgfault - Page major and minor fault operations",
+ "fill": 0,
+ "stack": false
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pgfault - Page major and minor fault operations",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pgmajfault - Major page fault operations",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[5m]) - irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Pgminfault - Minor page fault operations",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Memory Page Faults",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6133",
+ "format": "short",
+ "label": "faults",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6134",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "Active": "#99440A",
+ "Buffers": "#58140C",
+ "Cache": "#6D1F62",
+ "Cached": "#511749",
+ "Committed": "#508642",
+ "Dirty": "#6ED0E0",
+ "Free": "#B7DBAB",
+ "Inactive": "#EA6460",
+ "Mapped": "#052B51",
+ "PageTables": "#0A50A1",
+ "Page_Tables": "#0A50A1",
+ "Slab_Cache": "#EAB839",
+ "Swap": "#BF1B00",
+ "Swap_Cache": "#C15C17",
+ "Total": "#511749",
+ "Total RAM": "#052B51",
+ "Total RAM + Swap": "#052B51",
+ "Total Swap": "#614D93",
+ "VmallocUsed": "#EA6460"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 2,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 33
+ },
+ "hiddenSeries": false,
+ "id": 307,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": null,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_vmstat_oom_kill{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "oom killer invocations ",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "OOM Killer",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:5373",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:5374",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Memory Vmstat",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 23
+ },
+ "id": 293,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 24
+ },
+ "hiddenSeries": false,
+ "id": 260,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Variation*./",
+ "color": "#890F02"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_timex_estimated_error_seconds{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Estimated error in seconds",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_timex_offset_seconds{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Time offset in between local system and reference clock",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_timex_maxerror_seconds{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Maximum error in seconds",
+ "refId": "C",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Time Syncronized Drift",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "seconds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 24
+ },
+ "hiddenSeries": false,
+ "id": 291,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_timex_loop_time_constant{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Phase-locked loop time adjust",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Time PLL Adjust",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 34
+ },
+ "hiddenSeries": false,
+ "id": 168,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Variation*./",
+ "color": "#890F02"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_timex_sync_status{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Is clock synchronized to a reliable server (1 = yes, 0 = no)",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_timex_frequency_adjustment_ratio{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Local clock frequency adjustment",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Time Syncronized Status",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 34
+ },
+ "hiddenSeries": false,
+ "id": 294,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_timex_tick_seconds{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Seconds between clock ticks",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_timex_tai_offset_seconds{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "International Atomic Time (TAI) offset",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Time Misc",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "seconds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "title": "System Timesync",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 24
+ },
+ "id": 312,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 7
+ },
+ "hiddenSeries": false,
+ "id": 62,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_procs_blocked{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Processes blocked waiting for I/O to complete",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_procs_running{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Processes in runnable state",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Processes Status",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6500",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6501",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 7
+ },
+ "hiddenSeries": false,
+ "id": 315,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_processes_state{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ state }}",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Processes State",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6500",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6501",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 17
+ },
+ "hiddenSeries": false,
+ "id": 148,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_forks_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Processes forks second",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Processes Forks",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6640",
+ "format": "short",
+ "label": "forks / sec",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6641",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 17
+ },
+ "hiddenSeries": false,
+ "id": 149,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Max.*/",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}[5m])",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Processes virtual memory size in bytes",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "process_resident_memory_max_bytes{instance=\"$node\",job=\"$job\"}",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Maximum amount of virtual memory available in bytes",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "irate(process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}[5m])",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Processes virtual memory size in bytes",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "irate(process_virtual_memory_max_bytes{instance=\"$node\",job=\"$job\"}[5m])",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Maximum amount of virtual memory available in bytes",
+ "refId": "D",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Processes Memory",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "decbytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 27
+ },
+ "hiddenSeries": false,
+ "id": 313,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:709",
+ "alias": "PIDs limit",
+ "color": "#F2495C",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_processes_pids{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Number of PIDs",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_processes_max_processes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "PIDs limit",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "PIDs Number and Limit",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6500",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6501",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 27
+ },
+ "hiddenSeries": false,
+ "id": 305,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:4963",
+ "alias": "/.*waiting.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_schedstat_running_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CPU {{ cpu }} - seconds spent running a process",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CPU {{ cpu }} - seconds spent by processing waiting for this CPU",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Process schedule stats Running / Waiting",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:4860",
+ "format": "s",
+ "label": "seconds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:4861",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 37
+ },
+ "hiddenSeries": false,
+ "id": 314,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:709",
+ "alias": "Threads limit",
+ "color": "#F2495C",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_processes_threads{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Allocated threads",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_processes_max_threads{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Threads limit",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Threads Number and Limit",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6500",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6501",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "title": "System Processes",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 25
+ },
+ "id": 269,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 8
+ },
+ "hiddenSeries": false,
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_context_switches_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Context switches",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "irate(node_intr_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Interrupts",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Context Switches / Interrupts",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 8
+ },
+ "hiddenSeries": false,
+ "id": 7,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_load1{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 4,
+ "legendFormat": "Load 1m",
+ "refId": "A",
+ "step": 480
+ },
+ {
+ "expr": "node_load5{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 4,
+ "legendFormat": "Load 5m",
+ "refId": "B",
+ "step": 480
+ },
+ {
+ "expr": "node_load15{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 4,
+ "legendFormat": "Load 15m",
+ "refId": "C",
+ "step": 480
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "System Load",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6261",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6262",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 18
+ },
+ "hiddenSeries": false,
+ "id": 259,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Critical*./",
+ "color": "#E24D42",
+ "fill": 0
+ },
+ {
+ "alias": "/.*Max*./",
+ "color": "#EF843C",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_interrupts_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ type }} - {{ info }}",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Interrupts Detail",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 18
+ },
+ "hiddenSeries": false,
+ "id": 306,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_schedstat_timeslices_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CPU {{ cpu }}",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Schedule timeslices executed by each cpu",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:4860",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:4861",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 28
+ },
+ "hiddenSeries": false,
+ "id": 151,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_entropy_available_bits{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Entropy available to random number generators",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Entropy",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6568",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6569",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 28
+ },
+ "hiddenSeries": false,
+ "id": 308,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(process_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Time spent",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "CPU time spent in user and system contexts",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:4860",
+ "format": "s",
+ "label": "seconds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:4861",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 38
+ },
+ "hiddenSeries": false,
+ "id": 64,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:6323",
+ "alias": "/.*Max*./",
+ "color": "#890F02",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "process_max_fds{instance=\"$node\",job=\"$job\"}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Maximum open file descriptors",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "process_open_fds{instance=\"$node\",job=\"$job\"}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Open file descriptors",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "File Descriptors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6338",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6339",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "System Misc",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 26
+ },
+ "id": 304,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 26
+ },
+ "hiddenSeries": false,
+ "id": 158,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:6726",
+ "alias": "/.*Critical*./",
+ "color": "#E24D42",
+ "fill": 0
+ },
+ {
+ "$$hashKey": "object:6727",
+ "alias": "/.*Max*./",
+ "color": "#EF843C",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_hwmon_temp_celsius{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ chip }} {{ sensor }} temp",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_hwmon_temp_crit_alarm_celsius{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ chip }} {{ sensor }} Critical Alarm",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_hwmon_temp_crit_celsius{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ chip }} {{ sensor }} Critical",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "node_hwmon_temp_crit_hyst_celsius{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ chip }} {{ sensor }} Critical Historical",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "node_hwmon_temp_max_celsius{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ chip }} {{ sensor }} Max",
+ "refId": "E",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Hardware temperature monitor",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:6750",
+ "format": "celsius",
+ "label": "temperature",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:6751",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 26
+ },
+ "hiddenSeries": false,
+ "id": 300,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:1655",
+ "alias": "/.*Max*./",
+ "color": "#EF843C",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_cooling_device_cur_state{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Current {{ name }} in {{ type }}",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_cooling_device_max_state{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Max {{ name }} in {{ type }}",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Throttle cooling device",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1678",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1679",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 36
+ },
+ "hiddenSeries": false,
+ "id": 302,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_power_supply_online{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ power_supply }} online",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Power supply",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1678",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1679",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "title": "Hardware Misc",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 27
+ },
+ "id": 296,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 10
+ },
+ "hiddenSeries": false,
+ "id": 297,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_systemd_socket_accepted_connections_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{ name }} Connections",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Systemd Sockets",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 10
+ },
+ "hiddenSeries": false,
+ "id": 298,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "Failed",
+ "color": "#F2495C"
+ },
+ {
+ "alias": "Inactive",
+ "color": "#FF9830"
+ },
+ {
+ "alias": "Active",
+ "color": "#73BF69"
+ },
+ {
+ "alias": "Deactivating",
+ "color": "#FFCB7D"
+ },
+ {
+ "alias": "Activating",
+ "color": "#C8F2C2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"activating\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Activating",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"active\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Active",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"deactivating\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Deactivating",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"failed\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Failed",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"inactive\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Inactive",
+ "refId": "E",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Systemd Units State",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "title": "Systemd",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 28
+ },
+ "id": 270,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 29
+ },
+ "hiddenSeries": false,
+ "id": 9,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": null,
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:2033",
+ "alias": "/.*Read.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "$$hashKey": "object:2034",
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "$$hashKey": "object:2035",
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "$$hashKey": "object:2036",
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "$$hashKey": "object:2037",
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "$$hashKey": "object:2038",
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "$$hashKey": "object:2039",
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "$$hashKey": "object:2040",
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "$$hashKey": "object:2041",
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "$$hashKey": "object:2042",
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "$$hashKey": "object:2043",
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "$$hashKey": "object:2044",
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "$$hashKey": "object:2045",
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "$$hashKey": "object:2046",
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "$$hashKey": "object:2047",
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "$$hashKey": "object:2048",
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "$$hashKey": "object:2049",
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "$$hashKey": "object:2050",
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "$$hashKey": "object:2051",
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "$$hashKey": "object:2052",
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "$$hashKey": "object:2053",
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - Reads completed",
+ "refId": "A",
+ "step": 8
+ },
+ {
+ "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Writes completed",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk IOps Completed",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:2186",
+ "format": "iops",
+ "label": "IO read (-) / write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:2187",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 29
+ },
+ "hiddenSeries": false,
+ "id": 33,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Read.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - Read bytes",
+ "refId": "A",
+ "step": 8
+ },
+ {
+ "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Written bytes",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk R/W Data",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": "bytes read (-) / write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 3,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 39
+ },
+ "hiddenSeries": false,
+ "id": 37,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Read.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_read_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "hide": false,
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - Read time",
+ "refId": "A",
+ "step": 8
+ },
+ {
+ "expr": "irate(node_disk_write_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Write time",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk R/W Time",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "time. read (-) / write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 39
+ },
+ "hiddenSeries": false,
+ "id": 35,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_io_time_weighted_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - IO time weighted",
+ "refId": "A",
+ "step": 8
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk IOs Weighted",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "time",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 49
+ },
+ "hiddenSeries": false,
+ "id": 133,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Read.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_reads_merged_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Read merged",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_disk_writes_merged_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Write merged",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk R/W Merged",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "iops",
+ "label": "I/Os",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 3,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 49
+ },
+ "hiddenSeries": false,
+ "id": 36,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - IO time",
+ "refId": "A",
+ "step": 8
+ },
+ {
+ "expr": "irate(node_disk_discard_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "interval": "",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - discard time",
+ "refId": "B",
+ "step": 8
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Time Spent Doing I/Os",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "time",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 59
+ },
+ "hiddenSeries": false,
+ "id": 34,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_io_now{instance=\"$node\",job=\"$job\"}[5m])",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - IO now",
+ "refId": "A",
+ "step": 8
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk IOs Current in Progress",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "iops",
+ "label": "I/Os",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 59
+ },
+ "hiddenSeries": false,
+ "id": 301,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null as zero",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:2034",
+ "alias": "/.*sda_.*/",
+ "color": "#7EB26D"
+ },
+ {
+ "$$hashKey": "object:2035",
+ "alias": "/.*sdb_.*/",
+ "color": "#EAB839"
+ },
+ {
+ "$$hashKey": "object:2036",
+ "alias": "/.*sdc_.*/",
+ "color": "#6ED0E0"
+ },
+ {
+ "$$hashKey": "object:2037",
+ "alias": "/.*sdd_.*/",
+ "color": "#EF843C"
+ },
+ {
+ "$$hashKey": "object:2038",
+ "alias": "/.*sde_.*/",
+ "color": "#E24D42"
+ },
+ {
+ "$$hashKey": "object:2039",
+ "alias": "/.*sda1.*/",
+ "color": "#584477"
+ },
+ {
+ "$$hashKey": "object:2040",
+ "alias": "/.*sda2_.*/",
+ "color": "#BA43A9"
+ },
+ {
+ "$$hashKey": "object:2041",
+ "alias": "/.*sda3_.*/",
+ "color": "#F4D598"
+ },
+ {
+ "$$hashKey": "object:2042",
+ "alias": "/.*sdb1.*/",
+ "color": "#0A50A1"
+ },
+ {
+ "$$hashKey": "object:2043",
+ "alias": "/.*sdb2.*/",
+ "color": "#BF1B00"
+ },
+ {
+ "$$hashKey": "object:2044",
+ "alias": "/.*sdb3.*/",
+ "color": "#E0752D"
+ },
+ {
+ "$$hashKey": "object:2045",
+ "alias": "/.*sdc1.*/",
+ "color": "#962D82"
+ },
+ {
+ "$$hashKey": "object:2046",
+ "alias": "/.*sdc2.*/",
+ "color": "#614D93"
+ },
+ {
+ "$$hashKey": "object:2047",
+ "alias": "/.*sdc3.*/",
+ "color": "#9AC48A"
+ },
+ {
+ "$$hashKey": "object:2048",
+ "alias": "/.*sdd1.*/",
+ "color": "#65C5DB"
+ },
+ {
+ "$$hashKey": "object:2049",
+ "alias": "/.*sdd2.*/",
+ "color": "#F9934E"
+ },
+ {
+ "$$hashKey": "object:2050",
+ "alias": "/.*sdd3.*/",
+ "color": "#EA6460"
+ },
+ {
+ "$$hashKey": "object:2051",
+ "alias": "/.*sde1.*/",
+ "color": "#E0F9D7"
+ },
+ {
+ "$$hashKey": "object:2052",
+ "alias": "/.*sdd2.*/",
+ "color": "#FCEACA"
+ },
+ {
+ "$$hashKey": "object:2053",
+ "alias": "/.*sde3.*/",
+ "color": "#F9E2D2"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_discards_completed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "interval": "",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}} - Discards completed",
+ "refId": "A",
+ "step": 8
+ },
+ {
+ "expr": "irate(node_disk_discards_merged_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Discards merged",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Disk IOps Discards completed / merged",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:2186",
+ "format": "iops",
+ "label": "IOs",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:2187",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Storage Disk",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 29
+ },
+ "id": 271,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": 3,
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 78
+ },
+ "hiddenSeries": false,
+ "id": 43,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - Available",
+ "metric": "",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_filesystem_free_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - Free",
+ "refId": "B",
+ "step": 2
+ },
+ {
+ "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - Size",
+ "refId": "C",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Filesystem space available",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:3826",
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:3827",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 78
+ },
+ "hiddenSeries": false,
+ "id": 41,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_files_free{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - Free file nodes",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "File Nodes Free",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:3894",
+ "format": "short",
+ "label": "file nodes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:3895",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 88
+ },
+ "hiddenSeries": false,
+ "id": 28,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filefd_maximum{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 4,
+ "legendFormat": "Max open files",
+ "refId": "A",
+ "step": 8
+ },
+ {
+ "expr": "node_filefd_allocated{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Open files",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "File Descriptor",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "files",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 88
+ },
+ "hiddenSeries": false,
+ "id": 219,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_files{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - File nodes total",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "File Nodes Size",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "file Nodes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {
+ "/ ReadOnly": "#890F02"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 98
+ },
+ "hiddenSeries": false,
+ "id": 44,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": true,
+ "hideZero": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 6,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_readonly{instance=\"$node\",job=\"$job\",device!~'rootfs'}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - ReadOnly",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_filesystem_device_error{instance=\"$node\",job=\"$job\",device!~'rootfs',fstype!~'tmpfs'}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}} - Device error",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Filesystem in ReadOnly / Error",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:3670",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": "1",
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:3671",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Storage Filesystem",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 30
+ },
+ "id": 272,
+ "panels": [
+ {
+ "aliasColors": {
+ "receive_packets_eth0": "#7EB26D",
+ "receive_packets_lo": "#E24D42",
+ "transmit_packets_eth0": "#7EB26D",
+ "transmit_packets_lo": "#E24D42"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 30
+ },
+ "hiddenSeries": false,
+ "id": 60,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_packets_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_network_transmit_packets_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Transmit",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic by Packets",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 30
+ },
+ "hiddenSeries": false,
+ "id": 142,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_errs_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive errors",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_network_transmit_errs_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Rransmit errors",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Errors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 40
+ },
+ "hiddenSeries": false,
+ "id": 143,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_drop_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive drop",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_network_transmit_drop_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Transmit drop",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Drop",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 40
+ },
+ "hiddenSeries": false,
+ "id": 141,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_compressed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive compressed",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_network_transmit_compressed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Transmit compressed",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Compressed",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 50
+ },
+ "hiddenSeries": false,
+ "id": 146,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_multicast_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive multicast",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Multicast",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 50
+ },
+ "hiddenSeries": false,
+ "id": 144,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_fifo_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive fifo",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_network_transmit_fifo_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Transmit fifo",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Fifo",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 60
+ },
+ "hiddenSeries": false,
+ "id": 145,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:576",
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_frame_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Receive frame",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Frame",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:589",
+ "format": "pps",
+ "label": "packets out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:590",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 60
+ },
+ "hiddenSeries": false,
+ "id": 231,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_transmit_carrier_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Statistic transmit_carrier",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Carrier",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 70
+ },
+ "hiddenSeries": false,
+ "id": 232,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Trans.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_transmit_colls_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}} - Transmit colls",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Traffic Colls",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 70
+ },
+ "hiddenSeries": false,
+ "id": 61,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:663",
+ "alias": "NF conntrack limit",
+ "color": "#890F02",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_nf_conntrack_entries{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "NF conntrack entries",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_nf_conntrack_entries_limit{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "NF conntrack limit",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "NF Contrack",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:678",
+ "format": "short",
+ "label": "entries",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:679",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 80
+ },
+ "hiddenSeries": false,
+ "id": 230,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_arp_entries{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ device }} - ARP entries",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "ARP Entries",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "Entries",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 80
+ },
+ "hiddenSeries": false,
+ "id": 288,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_network_mtu_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ device }} - Bytes",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "MTU",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 90
+ },
+ "hiddenSeries": false,
+ "id": 280,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_network_speed_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ device }} - Speed",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Speed",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 90
+ },
+ "hiddenSeries": false,
+ "id": 289,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_network_transmit_queue_length{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{ device }} - Interface transmit queue length",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Queue Length",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 0,
+ "format": "none",
+ "label": "packets",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 100
+ },
+ "hiddenSeries": false,
+ "id": 290,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:232",
+ "alias": "/.*Dropped.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_softnet_processed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CPU {{cpu}} - Processed",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_softnet_dropped_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CPU {{cpu}} - Dropped",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Softnet Packets",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:207",
+ "format": "short",
+ "label": "packetes drop (-) / process (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:208",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 100
+ },
+ "hiddenSeries": false,
+ "id": 310,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_softnet_times_squeezed_total{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CPU {{cpu}} - Squeezed",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Softnet Out of Quota",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:207",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:208",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 110
+ },
+ "hiddenSeries": false,
+ "id": 309,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_network_up{operstate=\"up\",instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "{{interface}} - Operational state UP",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_network_carrier{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "instant": false,
+ "legendFormat": "{{device}} - Physical link state",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Network Operational Status",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Network Traffic",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 31
+ },
+ "id": 273,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 13
+ },
+ "hiddenSeries": false,
+ "id": 63,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_sockstat_TCP_alloc{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "TCP_alloc - Allocated sockets",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_TCP_inuse{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "TCP_inuse - Tcp sockets currently in use",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_TCP_mem{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "TCP_mem - Used memory for tcp",
+ "refId": "C",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_TCP_orphan{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "TCP_orphan - Orphan sockets",
+ "refId": "D",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_TCP_tw{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "TCP_tw - Sockets wating close",
+ "refId": "E",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Sockstat TCP",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 13
+ },
+ "hiddenSeries": false,
+ "id": 124,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_sockstat_UDPLITE_inuse{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "UDPLITE_inuse - Udplite sockets currently in use",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_UDP_inuse{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "UDP_inuse - Udp sockets currently in use",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_UDP_mem{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "UDP_mem - Used memory for udp",
+ "refId": "C",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Sockstat UDP",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 126,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_sockstat_sockets_used{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Sockets_used - Sockets currently in use",
+ "refId": "A",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Sockstat Used",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "sockets",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 23
+ },
+ "hiddenSeries": false,
+ "id": 220,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_sockstat_TCP_mem_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "mem_bytes - TCP sockets in that state",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_UDP_mem_bytes{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "mem_bytes - UDP sockets in that state",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Sockstat Memory Size",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "bytes",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 33
+ },
+ "hiddenSeries": false,
+ "id": 125,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_sockstat_FRAG_inuse{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "FRAG_inuse - Frag sockets currently in use",
+ "refId": "A",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_FRAG_memory{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "FRAG_memory - Used memory for frag",
+ "refId": "B",
+ "step": 240
+ },
+ {
+ "expr": "node_sockstat_RAW_inuse{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "RAW_inuse - Raw sockets currently in use",
+ "refId": "C",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Sockstat FRAG / RAW",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1572",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1573",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Network Sockstat",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 32
+ },
+ "id": 274,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 32
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 221,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:1876",
+ "alias": "/.*Out.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_IpExt_InOctets{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "InOctets - Received octets",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_IpExt_OutOctets{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "OutOctets - Sent octets",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Netstat IP In / Out Octets",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1889",
+ "format": "short",
+ "label": "octects out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1890",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 32
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 81,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sideWidth": 300,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Ip_Forwarding{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Forwarding - IP forwarding",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Netstat IP Forwarding",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1957",
+ "format": "short",
+ "label": "datagrams",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1958",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 42
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 115,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Out.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Icmp_InMsgs{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "InMsgs - Messages which the entity received. Note that this counter includes all those counted by icmpInErrors",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Icmp_OutMsgs{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "OutMsgs - Messages which this entity attempted to send. Note that this counter includes all those counted by icmpOutErrors",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "ICMP In / Out",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "messages out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 42
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 50,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Out.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Icmp_InErrors{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "InErrors - Messages which the entity received but determined as having ICMP-specific errors (bad ICMP checksums, bad length, etc.)",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "ICMP Errors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "messages out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 52
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 55,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Out.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*Snd.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Udp_InDatagrams{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "InDatagrams - Datagrams received",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Udp_OutDatagrams{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "OutDatagrams - Datagrams sent",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "UDP In / Out",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "datagrams out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 52
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 109,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Udp_InErrors{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "InErrors - UDP Datagrams that could not be delivered to an application",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Udp_NoPorts{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "NoPorts - UDP Datagrams received on a port with no listener",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_UdpLite_InErrors{instance=\"$node\",job=\"$job\"}[5m])",
+ "interval": "",
+ "legendFormat": "InErrors Lite - UDPLite Datagrams that could not be delivered to an application",
+ "refId": "C"
+ },
+ {
+ "expr": "irate(node_netstat_Udp_RcvbufErrors{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "RcvbufErrors - UDP buffer errors received",
+ "refId": "D",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Udp_SndbufErrors{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "SndbufErrors - UDP buffer errors send",
+ "refId": "E",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "UDP Errors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:4232",
+ "format": "short",
+ "label": "datagrams",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:4233",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 62
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 299,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Out.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "/.*Snd.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Tcp_InSegs{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "instant": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "InSegs - Segments received, including those received in error. This count includes segments received on currently established connections",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Tcp_OutSegs{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "OutSegs - Segments sent, including those on current connections but excluding those containing only retransmitted octets",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP In / Out",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "datagrams out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 62
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 104,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_TcpExt_ListenOverflows{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "ListenOverflows - Times the listen queue of a socket overflowed",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_TcpExt_ListenDrops{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "ListenDrops - SYNs to LISTEN sockets ignored",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_TcpExt_TCPSynRetrans{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "TCPSynRetrans - SYN-SYN/ACK retransmits to break down retransmissions in SYN, fast/timeout retransmits",
+ "refId": "C",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Tcp_RetransSegs{instance=\"$node\",job=\"$job\"}[5m])",
+ "interval": "",
+ "legendFormat": "RetransSegs - Segments retransmitted - that is, the number of TCP segments transmitted containing one or more previously transmitted octets",
+ "refId": "D"
+ },
+ {
+ "expr": "irate(node_netstat_Tcp_InErrs{instance=\"$node\",job=\"$job\"}[5m])",
+ "interval": "",
+ "legendFormat": "InErrs - Segments received in error (e.g., bad TCP checksums)",
+ "refId": "E"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP Errors",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 72
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 85,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:454",
+ "alias": "/.*MaxConn *./",
+ "color": "#890F02",
+ "fill": 0
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_netstat_Tcp_CurrEstab{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "CurrEstab - TCP connections for which the current state is either ESTABLISHED or CLOSE- WAIT",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_netstat_Tcp_MaxConn{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "MaxConn - Limit on the total number of TCP connections the entity can support (Dinamic is \"-1\")",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP Connections",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:469",
+ "format": "short",
+ "label": "connections",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "$$hashKey": "object:470",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 72
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 91,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Sent.*/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_TcpExt_SyncookiesFailed{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "SyncookiesFailed - Invalid SYN cookies received",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_TcpExt_SyncookiesRecv{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "SyncookiesRecv - SYN cookies received",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_TcpExt_SyncookiesSent{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "SyncookiesSent - SYN cookies sent",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP SynCookie",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "counter out (-) / in (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 82
+ },
+ "height": "",
+ "hiddenSeries": false,
+ "id": 82,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "hideZero": false,
+ "max": true,
+ "min": true,
+ "rightSide": false,
+ "show": true,
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "maxPerRow": 12,
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Tcp_ActiveOpens{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "ActiveOpens - TCP connections that have made a direct transition to the SYN-SENT state from the CLOSED state",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "irate(node_netstat_Tcp_PassiveOpens{instance=\"$node\",job=\"$job\"}[5m])",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "PassiveOpens - TCP connections that have made a direct transition to the SYN-RCVD state from the LISTEN state",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP Direct Transition",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "connections",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Network Netstat",
+ "type": "row"
+ },
+ {
+ "collapsed": true,
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 33
+ },
+ "id": 279,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 0,
+ "y": 54
+ },
+ "hiddenSeries": false,
+ "id": 40,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_scrape_collector_duration_seconds{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{collector}} - Scrape duration",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Node Exporter Scrape Time",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "seconds",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "description": "",
+ "fill": 2,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 10,
+ "w": 12,
+ "x": 12,
+ "y": 54
+ },
+ "hiddenSeries": false,
+ "id": 157,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "dataLinks": []
+ },
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "$$hashKey": "object:1969",
+ "alias": "/.*error.*/",
+ "color": "#F2495C",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_scrape_collector_success{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{collector}} - Scrape success",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "node_textfile_scrape_error{instance=\"$node\",job=\"$job\"}",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{collector}} - Scrape textfile error (1 = true)",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Node Exporter Scrape",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:1484",
+ "format": "short",
+ "label": "counter",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:1485",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": null,
+ "title": "Node Exporter",
+ "type": "row"
+ }
+ ],
+ "refresh": "1m",
+ "schemaVersion": 22,
+ "style": "dark",
+ "tags": [
+ "linux"
+ ],
+ "templating": {
+ "list": [
+ {
+ "current": {
+ "selected": false,
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "includeAll": false,
+ "label": "datasource",
+ "multi": false,
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "",
+ "hide": 0,
+ "includeAll": false,
+ "index": -1,
+ "label": "Job",
+ "multi": false,
+ "name": "job",
+ "options": [],
+ "query": "label_values(node_uname_info, job)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(node_uname_info{job=\"$job\"}, instance)",
+ "hide": 0,
+ "includeAll": false,
+ "index": -1,
+ "label": "Host:",
+ "multi": false,
+ "name": "node",
+ "options": [],
+ "query": "label_values(node_uname_info{job=\"$job\"}, instance)",
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {
+ "selected": false,
+ "text": "[a-z]+|nvme[0-9]+n[0-9]+",
+ "value": "[a-z]+|nvme[0-9]+n[0-9]+"
+ },
+ "hide": 2,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "diskdevices",
+ "options": [
+ {
+ "selected": true,
+ "text": "[a-z]+|nvme[0-9]+n[0-9]+",
+ "value": "[a-z]+|nvme[0-9]+n[0-9]+"
+ }
+ ],
+ "query": "[a-z]+|nvme[0-9]+n[0-9]+",
+ "skipUrlSync": false,
+ "type": "custom"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-4h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Node Exporter",
+ "version": 1
+} \ No newline at end of file
diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/nomad.json b/terraform-ci-infra/1n_nmd/grafana/conf/nomad.json
new file mode 100644
index 0000000..40ffedd
--- /dev/null
+++ b/terraform-ci-infra/1n_nmd/grafana/conf/nomad.json
@@ -0,0 +1,869 @@
+{
+ "__inputs": [
+ {
+ "name": "DS_PROMETHEUS",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "5.3.2"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": "5.0.0"
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "5.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": "5.0.0"
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "description": "Nomad Jobs metrics",
+ "editable": true,
+ "gnetId": 12787,
+ "graphTooltip": 0,
+ "id": null,
+ "iteration": 1596708119930,
+ "links": [],
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "dtdurations",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 0,
+ "y": 0
+ },
+ "id": 16,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "alias": "",
+ "expr": "max(nomad_client_uptime{instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "rawSql": "SELECT\n UNIX_TIMESTAMP(<time_column>) as time_sec,\n <value column> as value,\n <series name column> as metric\nFROM <table name>\nWHERE $__timeFilter(time_column)\nORDER BY <time_column> ASC\n",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Uptime",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorPrefix": false,
+ "colorValue": false,
+ "colors": [
+ "#7eb26d",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 4,
+ "y": 0
+ },
+ "id": 17,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "alias": "",
+ "expr": "count(sum(nomad_client_allocs_memory_cache) by (exported_job))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "rawSql": "SELECT\n UNIX_TIMESTAMP(<time_column>) as time_sec,\n <value column> as value,\n <series name column> as metric\nFROM <table name>\nWHERE $__timeFilter(time_column)\nORDER BY <time_column> ASC\n",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Jobs",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorPrefix": false,
+ "colorValue": false,
+ "colors": [
+ "#7eb26d",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 8,
+ "y": 0
+ },
+ "id": 12,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "alias": "",
+ "expr": "sum(nomad_client_allocations_running{datacenter=\"$datacenter\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "rawSql": "SELECT\n UNIX_TIMESTAMP(<time_column>) as time_sec,\n <value column> as value,\n <series name column> as metric\nFROM <table name>\nWHERE $__timeFilter(time_column)\nORDER BY <time_column> ASC\n",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "Allocs",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 4,
+ "w": 12,
+ "x": 12,
+ "y": 0
+ },
+ "id": 14,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "alias": "",
+ "expr": "sum(nomad_client_allocations_blocked{datacenter=\"$datacenter\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "Blocked",
+ "rawSql": "SELECT\n UNIX_TIMESTAMP(<time_column>) as time_sec,\n <value column> as value,\n <series name column> as metric\nFROM <table name>\nWHERE $__timeFilter(time_column)\nORDER BY <time_column> ASC\n",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(nomad_client_allocations_pending{datacenter=\"$datacenter\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "Pending",
+ "refId": "B"
+ },
+ {
+ "expr": "sum(nomad_client_allocations_restart{datacenter=\"$datacenter\",instance=~\"$instance\"})",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "Restart ",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Block/Peding/Restart",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 12,
+ "x": 0,
+ "y": 13
+ },
+ "id": 2,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "host",
+ "repeatDirection": "v",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "rate(nomad_client_allocs_cpu_total_percent[5m:10s]) > 1",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{task}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Usage Percent",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 3,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 12,
+ "x": 12,
+ "y": 13
+ },
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "host",
+ "repeatDirection": "v",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(nomad_client_allocs_cpu_total_ticks{instance=~\"$instance\"}) by(exported_job, task)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{task}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Total Ticks",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 3,
+ "format": "timeticks",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 6,
+ "w": 12,
+ "x": 0,
+ "y": 19
+ },
+ "id": 6,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "host",
+ "repeatDirection": "v",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(nomad_client_allocs_memory_rss{instance=~\"$instance\"}) by(exported_job, task)",
+ "format": "time_series",
+ "interval": "",
+ "intervalFactor": 1,
+ "legendFormat": "{{task}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "RSS",
+ "tooltip": {
+ "shared": false,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": 3,
+ "format": "decbytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,<