From c318223fcd266c0ee2982e803c44e193c2023054 Mon Sep 17 00:00:00 2001 From: pmikus Date: Sun, 7 Mar 2021 08:57:13 +0000 Subject: Infra: Switch csit-shim to fdiotools + use /u/fdiotools + use ubuntu 20.04 Signed-off-by: pmikus Change-Id: I091e63a0d9e50de203b1527c7500b3864a616af6 --- terraform-ci-infra/1n_nmd/.gitignore | 1 - terraform-ci-infra/1n_nmd/.terraform.lock.hcl | 58 - .../alertmanager/conf/nomad/alertmanager.hcl | 380 - terraform-ci-infra/1n_nmd/alertmanager/main.tf | 40 - .../1n_nmd/alertmanager/variables.tf | 102 - .../grafana/conf/blackbox_exporter_http.json | 1030 -- .../grafana/conf/blackbox_exporter_icmp.json | 368 - terraform-ci-infra/1n_nmd/grafana/conf/consul.json | 1438 -- .../1n_nmd/grafana/conf/docker_cadvisor.json | 2040 --- .../1n_nmd/grafana/conf/node_exporter.json | 13696 ------------------- terraform-ci-infra/1n_nmd/grafana/conf/nomad.json | 869 -- .../1n_nmd/grafana/conf/nomad/grafana.hcl | 353 - .../1n_nmd/grafana/conf/prometheus.json | 3055 ----- terraform-ci-infra/1n_nmd/grafana/main.tf | 24 - terraform-ci-infra/1n_nmd/grafana/variables.tf | 66 - terraform-ci-infra/1n_nmd/main.tf | 165 - terraform-ci-infra/1n_nmd/minio/conf/nomad/mc.hcl | 73 - .../1n_nmd/minio/conf/nomad/minio.hcl | 223 - terraform-ci-infra/1n_nmd/minio/main.tf | 82 - terraform-ci-infra/1n_nmd/minio/outputs.tf | 4 - terraform-ci-infra/1n_nmd/minio/variables.tf | 170 - terraform-ci-infra/1n_nmd/minio/versions.tf | 13 - .../1n_nmd/nginx/conf/nomad/nginx.hcl | 283 - terraform-ci-infra/1n_nmd/nginx/main.tf | 18 - terraform-ci-infra/1n_nmd/nginx/variables.tf | 25 - terraform-ci-infra/1n_nmd/nginx/versions.tf | 13 - .../1n_nmd/prometheus/conf/nomad/prometheus.hcl | 682 - terraform-ci-infra/1n_nmd/prometheus/main.tf | 37 - terraform-ci-infra/1n_nmd/prometheus/variables.tf | 84 - terraform-ci-infra/1n_nmd/providers.tf | 21 - terraform-ci-infra/1n_nmd/terraform.tfstate | 637 - terraform-ci-infra/1n_nmd/terraform.tfstate.backup | 633 - terraform-ci-infra/1n_nmd/tools/artifacts.py | 138 - .../1n_nmd/tools/artifacts_download.py | 47 - terraform-ci-infra/1n_nmd/variables.tf | 11 - .../1n_nmd/vpp_device/conf/nomad/csit_shim.hcl | 169 - terraform-ci-infra/1n_nmd/vpp_device/main.tf | 21 - terraform-ci-infra/1n_nmd/vpp_device/variables.tf | 43 - terraform-ci-infra/2n_aws_c5n/.gitignore | 5 - terraform-ci-infra/2n_aws_c5n/deploy/main.tf | 390 - terraform-ci-infra/2n_aws_c5n/deploy/variables.tf | 143 - terraform-ci-infra/2n_aws_c5n/deploy/versions.tf | 17 - terraform-ci-infra/2n_aws_c5n/main.tf | 53 - terraform-ci-infra/3n_aws_c5n/.gitignore | 5 - terraform-ci-infra/3n_aws_c5n/deploy/main.tf | 497 - terraform-ci-infra/3n_aws_c5n/deploy/variables.tf | 158 - terraform-ci-infra/3n_aws_c5n/deploy/versions.tf | 17 - terraform-ci-infra/3n_aws_c5n/main.tf | 56 - terraform-ci-infra/3n_azure_fsv2/.gitignore | 4 - terraform-ci-infra/3n_azure_fsv2/main.tf | 593 - terraform-ci-infra/3n_azure_fsv2/nic.tf | 133 - terraform-ci-infra/README.txt | 33 - 52 files changed, 29216 deletions(-) delete mode 100644 terraform-ci-infra/1n_nmd/.gitignore delete mode 100644 terraform-ci-infra/1n_nmd/.terraform.lock.hcl delete mode 100644 terraform-ci-infra/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl delete mode 100644 terraform-ci-infra/1n_nmd/alertmanager/main.tf delete mode 100644 terraform-ci-infra/1n_nmd/alertmanager/variables.tf delete mode 100644 terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_http.json delete mode 100644 terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_icmp.json delete mode 100644 terraform-ci-infra/1n_nmd/grafana/conf/consul.json delete mode 100644 terraform-ci-infra/1n_nmd/grafana/conf/docker_cadvisor.json delete mode 100644 terraform-ci-infra/1n_nmd/grafana/conf/node_exporter.json delete mode 100644 terraform-ci-infra/1n_nmd/grafana/conf/nomad.json delete mode 100644 terraform-ci-infra/1n_nmd/grafana/conf/nomad/grafana.hcl delete mode 100644 terraform-ci-infra/1n_nmd/grafana/conf/prometheus.json delete mode 100644 terraform-ci-infra/1n_nmd/grafana/main.tf delete mode 100644 terraform-ci-infra/1n_nmd/grafana/variables.tf delete mode 100644 terraform-ci-infra/1n_nmd/main.tf delete mode 100644 terraform-ci-infra/1n_nmd/minio/conf/nomad/mc.hcl delete mode 100644 terraform-ci-infra/1n_nmd/minio/conf/nomad/minio.hcl delete mode 100644 terraform-ci-infra/1n_nmd/minio/main.tf delete mode 100644 terraform-ci-infra/1n_nmd/minio/outputs.tf delete mode 100644 terraform-ci-infra/1n_nmd/minio/variables.tf delete mode 100644 terraform-ci-infra/1n_nmd/minio/versions.tf delete mode 100644 terraform-ci-infra/1n_nmd/nginx/conf/nomad/nginx.hcl delete mode 100644 terraform-ci-infra/1n_nmd/nginx/main.tf delete mode 100644 terraform-ci-infra/1n_nmd/nginx/variables.tf delete mode 100644 terraform-ci-infra/1n_nmd/nginx/versions.tf delete mode 100644 terraform-ci-infra/1n_nmd/prometheus/conf/nomad/prometheus.hcl delete mode 100644 terraform-ci-infra/1n_nmd/prometheus/main.tf delete mode 100644 terraform-ci-infra/1n_nmd/prometheus/variables.tf delete mode 100644 terraform-ci-infra/1n_nmd/providers.tf delete mode 100644 terraform-ci-infra/1n_nmd/terraform.tfstate delete mode 100644 terraform-ci-infra/1n_nmd/terraform.tfstate.backup delete mode 100755 terraform-ci-infra/1n_nmd/tools/artifacts.py delete mode 100755 terraform-ci-infra/1n_nmd/tools/artifacts_download.py delete mode 100644 terraform-ci-infra/1n_nmd/variables.tf delete mode 100644 terraform-ci-infra/1n_nmd/vpp_device/conf/nomad/csit_shim.hcl delete mode 100644 terraform-ci-infra/1n_nmd/vpp_device/main.tf delete mode 100644 terraform-ci-infra/1n_nmd/vpp_device/variables.tf delete mode 100644 terraform-ci-infra/2n_aws_c5n/.gitignore delete mode 100644 terraform-ci-infra/2n_aws_c5n/deploy/main.tf delete mode 100644 terraform-ci-infra/2n_aws_c5n/deploy/variables.tf delete mode 100644 terraform-ci-infra/2n_aws_c5n/deploy/versions.tf delete mode 100644 terraform-ci-infra/2n_aws_c5n/main.tf delete mode 100644 terraform-ci-infra/3n_aws_c5n/.gitignore delete mode 100644 terraform-ci-infra/3n_aws_c5n/deploy/main.tf delete mode 100644 terraform-ci-infra/3n_aws_c5n/deploy/variables.tf delete mode 100644 terraform-ci-infra/3n_aws_c5n/deploy/versions.tf delete mode 100644 terraform-ci-infra/3n_aws_c5n/main.tf delete mode 100644 terraform-ci-infra/3n_azure_fsv2/.gitignore delete mode 100644 terraform-ci-infra/3n_azure_fsv2/main.tf delete mode 100644 terraform-ci-infra/3n_azure_fsv2/nic.tf delete mode 100644 terraform-ci-infra/README.txt (limited to 'terraform-ci-infra') diff --git a/terraform-ci-infra/1n_nmd/.gitignore b/terraform-ci-infra/1n_nmd/.gitignore deleted file mode 100644 index 8b1a7baa3e..0000000000 --- a/terraform-ci-infra/1n_nmd/.gitignore +++ /dev/null @@ -1 +0,0 @@ -.terraform/ \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/.terraform.lock.hcl b/terraform-ci-infra/1n_nmd/.terraform.lock.hcl deleted file mode 100644 index 3a2e4ef85f..0000000000 --- a/terraform-ci-infra/1n_nmd/.terraform.lock.hcl +++ /dev/null @@ -1,58 +0,0 @@ -# This file is maintained automatically by "terraform init". -# Manual edits may be lost in future updates. - -provider "registry.terraform.io/hashicorp/nomad" { - version = "1.4.11" - constraints = "~> 1.4.9" - hashes = [ - "h1:ElEvgyMfWoWyQbB6c51rGTjQlZKWf3QOvf5NhX/Vuyw=", - "zh:150d0ab25241a42f2ac5008878e0106c0887eec15181a40bee1433b87f01b8ed", - "zh:1d4ccda0729f12060e7f4ce5c6d83042d6d38ba2e546b68722ccb74832793b0c", - "zh:2964652181f59097aa1126f4b215b9232702b1a56df3e017e6b5683d5615714b", - "zh:42843e68bca24280e84ec600ae5d8f085fb26bdcdb4c0ccff2139ed81e1cb8c1", - "zh:4c6d90d40a360d84bc84c9af35c64056237537fa0f8118bf890fcf4e71f7b0f6", - "zh:51771ce42a99d7d4f5a4306367eee4cea72391c07f1f1c55c3c4a5c6a9eca53d", - "zh:6ab2389f1be6bb39d4966c253bf4fc77348e90f7e07ed2abb5ec5c90a4bbb615", - "zh:9b109254ea7ca6a5b0ede33b406cf5fed779f05672891bbd1cc3255c9cb17663", - "zh:a38c929d4fd03193cce94178c0fbaa1f7f09e93223ac71dc77c834d429b1c7c9", - "zh:bdc9bc10a1ecb5ae3da651df1709bf9d5474f25e310b73bdf32c86417674d32b", - ] -} - -provider "registry.terraform.io/hashicorp/template" { - version = "2.1.2" - constraints = "~> 2.1.2" - hashes = [ - "h1:8NcPRk3yxQtUlAT/YGfjBEJ76rQI2ljARYeIEjhtWho=", - "zh:149e4bf47ac21b67f6567767afcd29caaf0b0ca43714748093a00a2a98cd17a8", - "zh:2ff61a5eb7550e0df2baefccea78a8b621faef76154aad7ddf9c85c1d69f7ebf", - "zh:3b2d9a9f80754eb0a250a80e0dfdef385501697850a54ead744d1615e60fe648", - "zh:545b93c818035aac59f4a821644276c123a74aa210b1221974d832a6009df201", - "zh:5508512a522152a302591b399512fa736d8f57088c85ca74f7e00014db3a8c26", - "zh:701b56016a6db814ade171877375a2429b45979f97c2d112e4f2103f0433eb08", - "zh:90fc08165958538d8a099f17282c615d5b13f86bb215af33e2ca7551bf81996f", - "zh:affa6d409060c01a610854a395970d76701d0b07696e1ed6776b3f3b58014104", - "zh:b66ffed670bf0ed6714fa4ac26444a8e22f71ec6da134faf0b1f77fb2c13c666", - "zh:bb3d87db22f0ac56717eadde39690e3e27c1c01b10d0ecbe2e6e39f1e5c4d808", - "zh:c54b9693c9f348591432aabc808cbe1786bcda1cb70d312ef62a24545a14f945", - "zh:e7c8f8506cee5fa28f842714857d412a2b09e61127a0efe2a164c2f3d9bf2619", - ] -} - -provider "registry.terraform.io/hashicorp/vault" { - version = "2.16.0" - constraints = ">= 2.14.0" - hashes = [ - "h1:h27r8aZ5nwRfEelTQnJoA8s3TndJYPI7+3Df1DXIhXk=", - "zh:13dde74fac618ee0281bad60a60966a85d4a59c8420b15fd6499996fa1bc99b3", - "zh:1daad9da6c82f43cbd07bf1cfedf3c6960fb2f96bc59f94fd75d361065b8c51a", - "zh:68075d8e1824b745267ce9e4ef693b202b9282561811de6ccf7298935f482128", - "zh:86df4a4405413d575cd72985483163e62539afbd659fddef59fc637875b707e2", - "zh:8f8306ada4c1c44945ce5205e4f1cfbf5e3d46a9da2f3a1d0be17d32e4935845", - "zh:9eb75febcd6fcca9885a6f5e93293a200b2effbe31f47d265cc4d1346d42d29e", - "zh:a658b55b239bc7ad59a2bf55e7abbfe5f0111d37dd68b5d4bb947eee93969092", - "zh:af10679c241bd0e0168f57c24e839fd24c747f3e84b7bb6de3cd791471709249", - "zh:ee3030f36846de45450be088aa4c2b1f69246b2ecf40d7ea6a15a7f09ac5e5d0", - "zh:efe6cc23f77336604358e627b0b565c1421a97376e510a9cdaaf849524944713", - ] -} diff --git a/terraform-ci-infra/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl b/terraform-ci-infra/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl deleted file mode 100644 index 6b0d669d0e..0000000000 --- a/terraform-ci-infra/1n_nmd/alertmanager/conf/nomad/alertmanager.hcl +++ /dev/null @@ -1,380 +0,0 @@ -job "${job_name}" { - # The "region" parameter specifies the region in which to execute the job. - # If omitted, this inherits the default region name of "global". - # region = "global" - # - # The "datacenters" parameter specifies the list of datacenters which should - # be considered when placing this task. This must be provided. - datacenters = "${datacenters}" - - # The "type" parameter controls the type of job, which impacts the scheduler's - # decision on placement. This configuration is optional and defaults to - # "service". For a full list of job types and their differences, please see - # the online documentation. - # - # For more information, please see the online documentation at: - # - # https://www.nomadproject.io/docs/jobspec/schedulers - # - type = "service" - - update { - # The "max_parallel" parameter specifies the maximum number of updates to - # perform in parallel. In this case, this specifies to update a single task - # at a time. - max_parallel = 1 - - health_check = "checks" - - # The "min_healthy_time" parameter specifies the minimum time the allocation - # must be in the healthy state before it is marked as healthy and unblocks - # further allocations from being updated. - min_healthy_time = "10s" - - # The "healthy_deadline" parameter specifies the deadline in which the - # allocation must be marked as healthy after which the allocation is - # automatically transitioned to unhealthy. Transitioning to unhealthy will - # fail the deployment and potentially roll back the job if "auto_revert" is - # set to true. - healthy_deadline = "3m" - - # The "progress_deadline" parameter specifies the deadline in which an - # allocation must be marked as healthy. The deadline begins when the first - # allocation for the deployment is created and is reset whenever an allocation - # as part of the deployment transitions to a healthy state. If no allocation - # transitions to the healthy state before the progress deadline, the - # deployment is marked as failed. - progress_deadline = "10m" - -%{ if use_canary } - # The "canary" parameter specifies that changes to the job that would result - # in destructive updates should create the specified number of canaries - # without stopping any previous allocations. Once the operator determines the - # canaries are healthy, they can be promoted which unblocks a rolling update - # of the remaining allocations at a rate of "max_parallel". - # - # Further, setting "canary" equal to the count of the task group allows - # blue/green deployments. When the job is updated, a full set of the new - # version is deployed and upon promotion the old version is stopped. - canary = 1 - - # Specifies if the job should auto-promote to the canary version when all - # canaries become healthy during a deployment. Defaults to false which means - # canaries must be manually updated with the nomad deployment promote - # command. - auto_promote = true - - # The "auto_revert" parameter specifies if the job should auto-revert to the - # last stable job on deployment failure. A job is marked as stable if all the - # allocations as part of its deployment were marked healthy. - auto_revert = true -%{ endif } - } - - # The reschedule stanza specifies the group's rescheduling strategy. If - # specified at the job level, the configuration will apply to all groups - # within the job. If the reschedule stanza is present on both the job and the - # group, they are merged with the group stanza taking the highest precedence - # and then the job. - reschedule { - delay = "30s" - delay_function = "constant" - unlimited = true - } - - # The "group" stanza defines a series of tasks that should be co-located on - # the same Nomad client. Any task within a group will be placed on the same - # client. - # - # For more information and examples on the "group" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/group - # - group "prod-group1-${service_name}" { - # The "count" parameter specifies the number of the task groups that should - # be running under this group. This value must be non-negative and defaults - # to 1. - count = ${group_count} - - # The restart stanza configures a tasks's behavior on task failure. Restarts - # happen on the client that is running the task. - # - # https://www.nomadproject.io/docs/job-specification/restart - # - restart { - interval = "30m" - attempts = 40 - delay = "15s" - mode = "delay" - } - - # The constraint allows restricting the set of eligible nodes. Constraints - # may filter on attributes or client metadata. - # - # For more information and examples on the "volume" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/constraint - # - constraint { - attribute = "$${attr.cpu.arch}" - operator = "!=" - value = "arm64" - } - - # The "task" stanza creates an individual unit of work, such as a Docker - # container, web application, or batch processing. - # - # For more information and examples on the "task" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/task - # - task "prod-task1-${service_name}" { - # The "driver" parameter specifies the task driver that should be used to - # run the task. - driver = "exec" - - %{ if use_vault_provider } - vault { - policies = "${vault_kv_policy_name}" - } - %{ endif } - - # The "config" stanza specifies the driver configuration, which is passed - # directly to the driver to start the task. The details of configurations - # are specific to each driver, so please see specific driver - # documentation for more information. - config { - command = "local/alertmanager-${version}.linux-amd64/alertmanager" - args = [ - "--config.file=secrets/alertmanager.yml" - ] - } - - # The artifact stanza instructs Nomad to fetch and unpack a remote resource, - # such as a file, tarball, or binary. Nomad downloads artifacts using the - # popular go-getter library, which permits downloading artifacts from a - # variety of locations using a URL as the input source. - # - # For more information and examples on the "artifact" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/artifact - # - artifact { - source = "${url}" - } - - # The "template" stanza instructs Nomad to manage a template, such as - # a configuration file or script. This template can optionally pull data - # from Consul or Vault to populate runtime configuration data. - # - # For more information and examples on the "template" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/template - # - template { - change_mode = "noop" - change_signal = "SIGINT" - destination = "secrets/alertmanager.yml" - left_delimiter = "{{{" - right_delimiter = "}}}" - data = < ] -# -# # Certificate and key files for client cert authentication to the server. -# cert_file: -# key_file: -# -# # ServerName extension to indicate the name of the server. -# # http://tools.ietf.org/html/rfc4366#section-3.1 -# server_name: -# -# # Disable validation of the server certificate. -# insecure_skip_verify: true - -# The root route on which each incoming alert enters. -route: - receiver: '${slack_default_receiver}' - - # The labels by which incoming alerts are grouped together. For example, - # multiple alerts coming in for cluster=A and alertname=LatencyHigh would - # be batched into a single group. - # - # To aggregate by all possible labels use '...' as the sole label name. - # This effectively disables aggregation entirely, passing through all - # alerts as-is. This is unlikely to be what you want, unless you have - # a very low alert volume or your upstream notification system performs - # its own grouping. Example: group_by: [...] - group_by: ['alertname'] - - # When a new group of alerts is created by an incoming alert, wait at - # least 'group_wait' to send the initial notification. - # This way ensures that you get multiple alerts for the same group that start - # firing shortly after another are batched together on the first - # notification. - group_wait: 30s - - # When the first notification was sent, wait 'group_interval' to send a batch - # of new alerts that started firing for that group. - group_interval: 5m - - # If an alert has successfully been sent, wait 'repeat_interval' to - # resend them. - repeat_interval: 3h - - # All the above attributes are inherited by all child routes and can - # overwritten on each. - # The child route trees. - routes: - - match_re: - alertname: JenkinsJob.* - receiver: ${slack_jenkins_receiver} - routes: - - match: - severity: critical - receiver: '${slack_jenkins_receiver}' - - - match_re: - service: .* - receiver: ${slack_default_receiver} - routes: - - match: - severity: critical - receiver: '${slack_default_receiver}' - -# Inhibition rules allow to mute a set of alerts given that another alert is -# firing. -# We use this to mute any warning-level notifications if the same alert is -# already critical. -inhibit_rules: -- source_match: - severity: 'critical' - target_match: - severity: 'warning' - equal: ['alertname', 'instance'] - -receivers: -- name: '${slack_jenkins_receiver}' - slack_configs: - - api_url: 'https://hooks.slack.com/services/${slack_jenkins_api_key}' - channel: '#${slack_jenkins_channel}' - send_resolved: true - icon_url: https://avatars3.githubusercontent.com/u/3380462 - title: |- - [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }} - {{- if gt (len .CommonLabels) (len .GroupLabels) -}} - {{" "}}( - {{- with .CommonLabels.Remove .GroupLabels.Names }} - {{- range $index, $label := .SortedPairs -}} - {{ if $index }}, {{ end }} - {{- $label.Name }}="{{ $label.Value -}}" - {{- end }} - {{- end -}} - ) - {{- end }} - text: >- - {{ range .Alerts -}} - *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }} - - *Description:* {{ .Annotations.description }} - - *Details:* - {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}` - {{ end }} - {{ end }} - -- name: '${slack_default_receiver}' - slack_configs: - - api_url: 'https://hooks.slack.com/services/${slack_default_api_key}' - channel: '#${slack_default_channel}' - send_resolved: true - icon_url: https://avatars3.githubusercontent.com/u/3380462 - title: |- - [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }} for {{ .CommonLabels.job }} - {{- if gt (len .CommonLabels) (len .GroupLabels) -}} - {{" "}}( - {{- with .CommonLabels.Remove .GroupLabels.Names }} - {{- range $index, $label := .SortedPairs -}} - {{ if $index }}, {{ end }} - {{- $label.Name }}="{{ $label.Value -}}" - {{- end }} - {{- end -}} - ) - {{- end }} - text: >- - {{ range .Alerts -}} - *Alert:* {{ .Annotations.summary }}{{ if .Labels.severity }} - `{{ .Labels.severity }}`{{ end }} - - *Description:* {{ .Annotations.description }} - - *Details:* - {{ range .Labels.SortedPairs }} • *{{ .Name }}:* `{{ .Value }}` - {{ end }} - {{ end }} -EOH - } - - # The service stanza instructs Nomad to register a service with Consul. - # - # For more information and examples on the "task" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/service - # - service { - name = "${service_name}" - port = "${service_name}" - tags = [ "${service_name}$${NOMAD_ALLOC_INDEX}" ] - check { - name = "Alertmanager Check Live" - type = "http" - path = "/-/healthy" - interval = "10s" - timeout = "2s" - } - } - - # The "resources" stanza describes the requirements a task needs to - # execute. Resource requirements include memory, network, cpu, and more. - # This ensures the task will execute on a machine that contains enough - # resource capacity. - # - # For more information and examples on the "resources" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/resources - # - resources { - cpu = ${cpu} - memory = ${mem} - # The network stanza specifies the networking requirements for the task - # group, including the network mode and port allocations. When scheduling - # jobs in Nomad they are provisioned across your fleet of machines along - # with other jobs and services. Because you don't know in advance what host - # your job will be provisioned on, Nomad will provide your tasks with - # network configuration when they start up. - # - # For more information and examples on the "template" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/network - # - network { - port "${service_name}" { - static = ${port} - } - } - } - } - } -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/alertmanager/main.tf b/terraform-ci-infra/1n_nmd/alertmanager/main.tf deleted file mode 100644 index 9525aabc0c..0000000000 --- a/terraform-ci-infra/1n_nmd/alertmanager/main.tf +++ /dev/null @@ -1,40 +0,0 @@ -locals { - datacenters = join(",", var.nomad_datacenters) - - alertmanager_url = join("", - [ - "https://github.com", - "/prometheus/alertmanager/releases/download/", - "v${var.alertmanager_version}/", - "alertmanager-${var.alertmanager_version}.linux-amd64.tar.gz" - ] - ) -} - -data "template_file" "nomad_job_alertmanager" { - template = file("${path.module}/conf/nomad/alertmanager.hcl") - vars = { - datacenters = local.datacenters - url = local.alertmanager_url - job_name = var.alertmanager_job_name - use_canary = var.alertmanager_use_canary - group_count = var.alertmanager_group_count - service_name = var.alertmanager_service_name - use_vault_provider = var.alertmanager_vault_secret.use_vault_provider - version = var.alertmanager_version - cpu = var.alertmanager_cpu - mem = var.alertmanager_mem - port = var.alertmanager_port - slack_jenkins_api_key = var.alertmanager_slack_jenkins_api_key - slack_jenkins_channel = var.alertmanager_slack_jenkins_channel - slack_jenkins_receiver = var.alertmanager_slack_jenkins_receiver - slack_default_api_key = var.alertmanager_slack_default_api_key - slack_default_channel = var.alertmanager_slack_default_channel - slack_default_receiver = var.alertmanager_slack_default_receiver - } -} - -resource "nomad_job" "nomad_job_alertmanager" { - jobspec = data.template_file.nomad_job_alertmanager.rendered - detach = false -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/alertmanager/variables.tf b/terraform-ci-infra/1n_nmd/alertmanager/variables.tf deleted file mode 100644 index ffedf24f3d..0000000000 --- a/terraform-ci-infra/1n_nmd/alertmanager/variables.tf +++ /dev/null @@ -1,102 +0,0 @@ -# Nomad -variable "nomad_datacenters" { - description = "Nomad data centers" - type = list(string) - default = [ "dc1" ] -} - -# Alermanager -variable "alertmanager_job_name" { - description = "Job name" - type = string - default = "alertmanager" -} - -variable "alertmanager_group_count" { - description = "Number of group instances" - type = number - default = 1 -} - -variable "alertmanager_service_name" { - description = "Service name" - type = string - default = "alertmanager" -} - -variable "alertmanager_version" { - description = "Version" - type = string - default = "0.21.0" -} - -variable "alertmanager_use_canary" { - description = "Uses canary deployment" - type = bool - default = false -} - -variable "alertmanager_vault_secret" { - description = "Set of properties to be able to fetch secret from vault" - type = object({ - use_vault_provider = bool, - vault_kv_policy_name = string, - vault_kv_path = string, - vault_kv_field_access_key = string, - vault_kv_field_secret_key = string - }) -} - -variable "alertmanager_cpu" { - description = "CPU allocation" - type = number - default = 1000 -} - -variable "alertmanager_mem" { - description = "RAM allocation" - type = number - default = 1024 -} - -variable "alertmanager_port" { - description = "TCP allocation" - type = number - default = 9093 -} - -variable "alertmanager_slack_jenkins_api_key" { - description = "Alertmanager jenkins slack API key" - type = string - default = "XXXXXXXXX/XXXXXXXXXXX/XXXXXXXXXXXXXXXXXXXXXXXX" -} - -variable "alertmanager_slack_jenkins_receiver" { - description = "Alertmanager jenkins slack receiver" - type = string - default = "jenkins-slack-receiver" -} - -variable "alertmanager_slack_jenkins_channel" { - description = "Alertmanager jenkins slack channel" - type = string - default = "jenkins-channel" -} - -variable "alertmanager_slack_default_api_key" { - description = "Alertmanager default slack API key" - type = string - default = "XXXXXXXXX/XXXXXXXXXXX/XXXXXXXXXXXXXXXXXXXXXXXX" -} - -variable "alertmanager_slack_default_receiver" { - description = "Alertmanager default slack receiver" - type = string - default = "default-slack-receiver" -} - -variable "alertmanager_slack_default_channel" { - description = "Alertmanager default slack channel" - type = string - default = "default-channel" -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_http.json b/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_http.json deleted file mode 100644 index f9df1b239e..0000000000 --- a/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_http.json +++ /dev/null @@ -1,1030 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "signcl-prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.2.2" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Prometheus Blackbox Exporter Overview", - "editable": true, - "gnetId": 7587, - "graphTooltip": 0, - "id": null, - "iteration": 1534695504413, - "links": [], - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 138, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "probe_duration_seconds{instance=~\"$target\"}", - "format": "time_series", - "interval": "$interval", - "intervalFactor": 1, - "legendFormat": "{{ instance }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Global Probe Duration", - "tooltip": { - "shared": true, - "sort": 1, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 15, - "panels": [], - "repeat": "target", - "title": "$target status", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 6, - "w": 10, - "x": 4, - "y": 9 - }, - "id": 25, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "probe_http_duration_seconds{instance=~\"$target\"}", - "format": "time_series", - "interval": "$interval", - "intervalFactor": 1, - "legendFormat": "{{ phase }}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "HTTP Duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 6, - "w": 10, - "x": 14, - "y": 9 - }, - "id": 17, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "probe_duration_seconds{instance=~\"$target\"}", - "format": "time_series", - "interval": "$interval", - "intervalFactor": 1, - "legendFormat": "seconds", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Probe Duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 4, - "x": 0, - "y": 11 - }, - "id": 20, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 3, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "repeat": null, - "repeatDirection": "h", - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "probe_http_status_code{instance=~\"$target\"}", - "format": "time_series", - "interval": "$interval", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "201, 399", - "title": "HTTP Status Code", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "YES", - "value": "1" - }, - { - "op": "=", - "text": "N/A", - "value": "0" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 4, - "x": 0, - "y": 13 - }, - "id": 27, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "probe_http_version{instance=~\"$target\"}", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "HTTP Version", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "#d44a3a", - "rgba(237, 129, 40, 0.89)", - "#299c46" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 4, - "x": 0, - "y": 15 - }, - "id": 18, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 3, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "repeat": null, - "repeatDirection": "v", - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "probe_http_ssl{instance=~\"$target\"}", - "format": "time_series", - "interval": "$interval", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "0, 1", - "title": "SSL", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "YES", - "value": "1" - }, - { - "op": "=", - "text": "NO", - "value": "0" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "#d44a3a", - "rgba(237, 129, 40, 0.89)", - "#299c46" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "format": "dtdurations", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 10, - "x": 4, - "y": 15 - }, - "id": 19, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 3, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "repeat": null, - "repeatDirection": "h", - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "probe_ssl_earliest_cert_expiry{instance=~\"$target\"} - time()", - "format": "time_series", - "interval": "$interval", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "0,1209600", - "timeFrom": null, - "title": "SSL Expiry", - "transparent": false, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "YES", - "value": "1" - }, - { - "op": "=", - "text": "NO", - "value": "0" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 5, - "x": 14, - "y": 15 - }, - "id": 23, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "repeat": null, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "avg(probe_duration_seconds{instance=~\"$target\"})", - "format": "time_series", - "interval": "$interval", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Average Probe Duration", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 5, - "x": 19, - "y": 15 - }, - "id": 24, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "repeat": null, - "repeatDirection": "h", - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "avg(probe_dns_lookup_time_seconds{instance=~\"$target\"})", - "format": "time_series", - "interval": "$interval", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Average DNS Lookup", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "refresh": "10s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "blackbox", - "prometheus" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "default", - "value": "default" - }, - "hide": 0, - "includeAll": false, - "label": "datasource", - "multi": false, - "name": "DS_PROMETHEUS", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "auto": true, - "auto_count": 10, - "auto_min": "10s", - "current": { - "text": "10s", - "value": "10s" - }, - "hide": 0, - "label": "Interval", - "name": "interval", - "options": [ - { - "selected": false, - "text": "auto", - "value": "$__auto_interval_interval" - }, - { - "selected": false, - "text": "5s", - "value": "5s" - }, - { - "selected": true, - "text": "10s", - "value": "10s" - }, - { - "selected": false, - "text": "30s", - "value": "30s" - }, - { - "selected": false, - "text": "1m", - "value": "1m" - }, - { - "selected": false, - "text": "10m", - "value": "10m" - }, - { - "selected": false, - "text": "30m", - "value": "30m" - }, - { - "selected": false, - "text": "1h", - "value": "1h" - }, - { - "selected": false, - "text": "6h", - "value": "6h" - }, - { - "selected": false, - "text": "12h", - "value": "12h" - }, - { - "selected": false, - "text": "1d", - "value": "1d" - }, - { - "selected": false, - "text": "7d", - "value": "7d" - }, - { - "selected": false, - "text": "14d", - "value": "14d" - }, - { - "selected": false, - "text": "30d", - "value": "30d" - } - ], - "query": "5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", - "refresh": 2, - "type": "interval" - }, - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": true, - "label": null, - "multi": true, - "name": "target", - "options": [], - "query": "label_values(probe_success, instance)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-4h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "HTTP Exporter", - "version": 1 -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_icmp.json b/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_icmp.json deleted file mode 100644 index df30506348..0000000000 --- a/terraform-ci-infra/1n_nmd/grafana/conf/blackbox_exporter_icmp.json +++ /dev/null @@ -1,368 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "localhost", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "6.5.2" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "panel", - "id": "heatmap", - "name": "Heatmap", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": 12412, - "graphTooltip": 0, - "id": null, - "iteration": 1591284149575, - "links": [], - "panels": [ - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateRdYlGn", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 0 - }, - "heatmap": {}, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 7, - "legend": { - "show": true - }, - "options": {}, - "reverseYBuckets": true, - "targets": [ - { - "expr": "sum(probe_icmp_duration_seconds{phase=\"rtt\"}) by (instance)", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ICMP RTT", - "tooltip": { - "show": true, - "showHistogram": true - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": null, - "format": "s", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "middle", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateRdYlGn", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 8 - }, - "heatmap": {}, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 8, - "legend": { - "show": true - }, - "options": {}, - "reverseYBuckets": true, - "targets": [ - { - "expr": "1-avg_over_time(probe_success{instance=~\"$instance\"}[$__interval])", - "format": "time_series", - "hide": false, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ICMP packet loss", - "tooltip": { - "show": true, - "showHistogram": true - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": null, - "xBucketSize": null, - "yAxis": { - "decimals": null, - "format": "percentunit", - "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null - }, - "yBucketBound": "middle", - "yBucketNumber": null, - "yBucketSize": null - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "description": "This uses the blackbox exporter, which does not expose packet loss, for example. It could be improved with https://github.com/SuperQ/smokeping_prober because it also keeps track of lost samples (https://github.com/SuperQ/smokeping_prober/issues/24). Unfortunately, that still won't make graphs as nice as smokeping, because each probe only keeps one sample, instead of doing multiple like smokeping does (https://github.com/SuperQ/smokeping_prober/issues/36).", - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 16 - }, - "hiddenSeries": false, - "id": 2, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "maxPerRow": 2, - "nullPointMode": "connected", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 0.5, - "points": false, - "renderer": "flot", - "repeat": "instance", - "repeatDirection": "v", - "seriesOverrides": [ - { - "alias": "packet loss", - "color": "#C4162A", - "lines": false, - "pointradius": 1, - "points": true, - "yaxis": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "sum(probe_icmp_duration_seconds{phase=\"rtt\",instance=~\"$instance\"}) by (instance) > 0", - "instant": false, - "legendFormat": "RTT", - "refId": "A" - }, - { - "expr": "1-avg_over_time(probe_success{instance=~\"$instance\"}[$__interval])", - "format": "time_series", - "legendFormat": "packet loss", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "ICMP round trip time ($instance)", - "tooltip": { - "shared": true, - "sort": 1, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "dtdurations", - "label": "RTT", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "percentunit", - "label": "packet loss", - "logBase": 1, - "max": "1", - "min": "0.0001", - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": false, - "schemaVersion": 21, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "default", - "value": "default" - }, - "hide": 0, - "includeAll": false, - "label": "datasource", - "multi": false, - "name": "DS_PROMETHEUS", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(probe_success, instance)", - "hide": 0, - "includeAll": true, - "label": null, - "multi": true, - "name": "instance", - "options": [], - "query": "label_values(probe_success, instance)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-4h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ] - }, - "timezone": "", - "title": "ICMP exporter", - "version": 1, - "description": "Graph ICMP metrics from the blackbox exporter, Smokeping-style" -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/consul.json b/terraform-ci-infra/1n_nmd/grafana/conf/consul.json deleted file mode 100644 index 2e4a36f076..0000000000 --- a/terraform-ci-infra/1n_nmd/grafana/conf/consul.json +++ /dev/null @@ -1,1438 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "4.3.0-beta1" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" - } - ], - "annotations": { - "list": [] - }, - "editable": true, - "gnetId": 2351, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [], - "rows": [ - { - "collapse": false, - "height": 153, - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 1, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "consul_raft_leader_lastcontact_count", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{host}}", - "refId": "A", - "step": 60 - } - ], - "thresholds": "", - "title": "Consul Leader", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "name" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "none", - "gauge": { - "maxValue": 3, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 17, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "COUNT (changes(consul_memberlist_gossep_sum[1m]) > 0) BY (labels)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 60 - } - ], - "thresholds": "1,2", - "title": "# servers in cluster", - "type": "singlestat", - "valueFontSize": "100%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 18, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(irate(node_cpu{mode=\"idle\", host=\"$consul\"}[1m])) * 100 / count_scalar(node_cpu{mode=\"user\", host=\"$consul\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 60 - } - ], - "thresholds": "", - "title": "CPU Idle", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "none", - "gauge": { - "maxValue": 4, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 14, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "node_load1{host=\"$consul\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 60 - } - ], - "thresholds": "1,2", - "title": "Load 1", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "none", - "gauge": { - "maxValue": 4, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 15, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "node_load5{host=\"$consul\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 60 - } - ], - "thresholds": "1,2", - "title": "Load 5", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "none", - "gauge": { - "maxValue": 4, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "id": 16, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 2, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "node_load15{host=\"$consul\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 60 - } - ], - "thresholds": "1,2", - "title": "Load 15", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "The amount of TCP messages that are sent/received from the server.", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(consul_memberlist_tcp{host=\"$consul\"}[1m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Memberlist TCP Messages", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "The amount of UDP messages that are sent/received from the server.", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(consul_memberlist_udp{host=\"$consul\"}[1m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{type}}", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Memberlist UDP Messages", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "This measures the time it takes to replicate log entries to followers. This is a general indicator of the load pressure on the Consul servers, as well as the performance of the communication between the servers.", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "consul_raft_replication_appendEntries_rpc", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{query}} - {{quantile}}%", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Log replication from leader to servers", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "consul_raft_replication_heartbeat", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{query}} - {{quantile}}%", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "consul_raft_replication_heartbeat", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "This measures the time it takes for the leader to write log entries to disk.", - "fill": 1, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "consul_raft_leader_dispatchLog", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{quantile}}%", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Write logs", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "This measures the time it takes to commit a new entry to the Raft log on the leader.", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "consul_raft_commitTime", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{quantile}}%", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Commit time Leader", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "This counts the number of Raft transactions occurring over the interval, which is a general indicator of the write load on the Consul servers.", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "delta(consul_raft_apply[30s])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Transactions", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Raft Transactions", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "This will only be emitted by the Raft leader and measures the time since the leader was last able to contact the follower nodes when checking its leader lease. It can be used as a measure for how stable the Raft timing is and how close the leader is to timing out its lease.\n\nThe lease timeout is 500 ms times the raft_multiplier configuration, so this telemetry value should not be getting close to that configured value, otherwise the Raft timing is marginal and might need to be tuned, or more powerful servers might be needed. See the Server Performance guide for more details.", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "consul_raft_leader_lastcontact", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{quantile}}%", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Leader lastContact", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "delta(consul_rpc_query{host=\"$consul\"}[30s])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Requests", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "RPC Requests", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "Consul uses a network tomography system to compute network coordinates for nodes in the cluster. These coordinates allow the network round trip time to be estimated between any two nodes using a very simple calculation. This allows for many useful applications, such as finding the service node nearest a requesting node, or failing over to services in the next closest datacenter.", - "fill": 1, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "consul_serf_coordinate_adjustment_ms{host=\"$consul\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{quantile}}%", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Serf Coordinates", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "default", - "value": "default" - }, - "hide": 0, - "includeAll": false, - "label": "datasource", - "multi": false, - "name": "DS_PROMETHEUS", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "consul", - "options": [], - "query": "label_values(consul_memberlist_gossep_sum, host)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-4h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Consul", - "version": 1 -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/docker_cadvisor.json b/terraform-ci-infra/1n_nmd/grafana/conf/docker_cadvisor.json deleted file mode 100644 index bbad614bb4..0000000000 --- a/terraform-ci-infra/1n_nmd/grafana/conf/docker_cadvisor.json +++ /dev/null @@ -1,2040 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "6.2.4" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "A simple overview of the most important Docker host and container metrics. (cAdvisor/Prometheus)", - "editable": true, - "gnetId": 10657, - "graphTooltip": 1, - "id": null, - "iteration": 1564715574785, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "editable": true, - "error": false, - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 0 - }, - "height": "", - "id": 24, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "options": {}, - "postfix": "", - "postfixFontSize": "30%", - "prefix": "", - "prefixFontSize": "20%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "time() - node_boot_time_seconds{instance=~\"$node:.*\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 1800 - } - ], - "thresholds": "", - "title": "Uptime", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 0 - }, - "id": 31, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "options": {}, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(container_last_seen{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"})", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 1800 - } - ], - "thresholds": "", - "title": "Containers", - "type": "singlestat", - "valueFontSize": "120%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "editable": true, - "error": false, - "format": "decbytes", - "gauge": { - "maxValue": 500000000, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 0 - }, - "id": 30, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "options": {}, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "(node_memory_SwapTotal_bytes{instance=~'$node:9100'} - node_memory_SwapFree_bytes{instance=~'$node:9100'})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 1800 - } - ], - "thresholds": "400000000", - "title": "Swap", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "editable": true, - "error": false, - "format": "percentunit", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 12, - "y": 0 - }, - "id": 27, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "options": {}, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(50, 189, 31, 0.18)", - "full": false, - "lineColor": "rgb(69, 193, 31)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "node_load1{instance=~\"$node:9100\"} / count by(job, instance)(count by(job, instance, cpu)(node_cpu_seconds_total{instance=~\"$node:9100\"}))", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 1800 - } - ], - "thresholds": "0.8,0.9", - "title": "Load", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 10000000000 - ], - "type": "gt" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Available Memory alert", - "noDataState": "keep_state", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": { - "Available Memory": "#7EB26D", - "Unavailable Memory": "#7EB26D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 10, - "w": 4, - "x": 16, - "y": 0 - }, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": {}, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "container_memory_rss{name=~\".+\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "D", - "step": 20 - }, - { - "expr": "sum(container_memory_rss{name=~\".+\"})", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "A", - "step": 20 - }, - { - "expr": "container_memory_usage_bytes{name=~\".+\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "B", - "step": 20 - }, - { - "expr": "container_memory_rss{id=\"/\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "C", - "step": 20 - }, - { - "expr": "sum(container_memory_rss)", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "E", - "step": 20 - }, - { - "expr": "node_memory_Buffers", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "node_memory_Dirty", - "refId": "N", - "step": 30 - }, - { - "expr": "node_memory_MemFree", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "F", - "step": 20 - }, - { - "expr": "node_memory_MemAvailable", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "Available Memory", - "refId": "H", - "step": 20 - }, - { - "expr": "node_memory_MemTotal_bytes{instance=~\"$node:9100\"} - node_memory_MemAvailable_bytes{instance=~\"$node:9100\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Unavailable Memory", - "refId": "G", - "step": 600 - }, - { - "expr": "node_memory_Inactive", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "I", - "step": 30 - }, - { - "expr": "node_memory_KernelStack", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "J", - "step": 30 - }, - { - "expr": "node_memory_Active", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "K", - "step": 30 - }, - { - "expr": "node_memory_MemTotal - (node_memory_Active + node_memory_MemFree + node_memory_Inactive)", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "Unknown", - "refId": "L", - "step": 40 - }, - { - "expr": "node_memory_MemFree + node_memory_Inactive ", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "M", - "step": 30 - }, - { - "expr": "container_memory_rss{name=~\".+\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{__name__}}", - "refId": "O", - "step": 30 - }, - { - "expr": "node_memory_Inactive + node_memory_MemFree + node_memory_MemAvailable", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "P", - "step": 40 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 10000000000, - "yaxis": "left" - } - ], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Available Memory", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "", - "logBase": 1, - "max": 16000000000, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 850000000000 - ], - "type": "gt" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Free/Used Disk Space alert", - "noDataState": "keep_state", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": { - "Belegete Festplatte": "#BF1B00", - "Free Disk Space": "#7EB26D", - "Used Disk Space": "#7EB26D", - "{}": "#BF1B00" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 10, - "w": 4, - "x": 20, - "y": 0 - }, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": {}, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Used Disk Space", - "yaxis": 1 - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_filesystem_size_bytes{fstype=\"rootfs\"} - node_filesystem_free_bytes{fstype=\"rootfs\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Used Disk Space", - "refId": "A", - "step": 600 - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 850000000000 - } - ], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Used Disk Space", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "", - "logBase": 1, - "max": 1000000000000, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "SENT": "#BF1B00" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 6, - "w": 4, - "x": 0, - "y": 4 - }, - "id": 19, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": {}, - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "RECEIVED", - "refId": "A", - "step": 600 - }, - { - "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "SENT", - "refId": "B", - "step": 600 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Traffic", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "editable": true, - "error": false, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 4, - "y": 4 - }, - "id": 25, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "options": {}, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "((node_memory_MemTotal_bytes{instance=~\"$node:9100\"} - node_memory_MemAvailable_bytes{instance=~\"$node:9100\"}) / node_memory_MemTotal_bytes{instance=~\"$node:9100\"}) * 100", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 1800 - } - ], - "thresholds": "70, 90", - "title": "Memory", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": { - "{id=\"/\",instance=\"cadvisor:8080\",job=\"prometheus\"}": "#BA43A9" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 6, - "w": 4, - "x": 8, - "y": 4 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": {}, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(container_cpu_system_seconds_total[1m]))", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "a", - "refId": "B", - "step": 120 - }, - { - "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m]))", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "nur container", - "refId": "F", - "step": 10 - }, - { - "expr": "sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m]))", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "nur docker host", - "metric": "", - "refId": "A", - "step": 20 - }, - { - "expr": "sum(rate(process_cpu_seconds_total[$interval])) * 100", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "host", - "metric": "", - "refId": "C", - "step": 600 - }, - { - "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m])) + sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m])) + sum(rate(process_cpu_seconds_total[1m]))", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 120 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "IN on /sda": "#7EB26D", - "OUT on /sda": "#890F02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 6, - "w": 4, - "x": 12, - "y": 4 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": {}, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "-sum(rate(node_disk_read_bytes_total[$interval])) by (device)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "OUT on /{{device}}", - "metric": "node_disk_bytes_read", - "refId": "A", - "step": 600 - }, - { - "expr": "sum(rate(node_disk_written_bytes_total[$interval])) by (device)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "IN on /{{device}}", - "metric": "", - "refId": "B", - "step": 600 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Disk I/O", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 10 - }, - "id": 8, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null as zero", - "options": {}, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(container_network_receive_bytes_total{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}[$interval])) by (name)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "A", - "step": 240 - }, - { - "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Received Network Traffic per Container", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 1, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 10 - }, - "id": 9, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null as zero", - "options": {}, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(container_network_transmit_bytes_total{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}[$interval])) by (name)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "A", - "step": 240 - }, - { - "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Sent Network Traffic per Container", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": "", - "logBase": 10, - "max": 8, - "min": 0, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 5, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 17 - }, - "id": 1, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": {}, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(container_cpu_usage_seconds_total{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}[$interval])) by (name) * 100", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{name}}", - "metric": "", - "refId": "F", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU Usage per Container", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": "", - "logBase": 1, - "max": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 3, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 17 - }, - "id": 34, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null as zero", - "options": {}, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(container_memory_swap{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}) by (name)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "A", - "step": 240 - }, - { - "expr": "container_memory_usage_bytes{name=~\".+\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Swap per Container", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "editable": true, - "error": false, - "fill": 3, - "grid": {}, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 24 - }, - "id": 10, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null as zero", - "options": {}, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(container_memory_rss{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}) by (name)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "A", - "step": 240 - }, - { - "expr": "container_memory_usage_bytes{name=~\".+\"}", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Usage per Container", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "editable": true, - "error": false, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 24 - }, - "id": 36, - "links": [], - "options": {}, - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [ - "10000000", - " 25000000" - ], - "type": "number", - "unit": "decbytes" - } - ], - "targets": [ - { - "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"} - container_memory_usage_bytes{name=~\".+\"}) by (name) ", - "format": "table", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{name}}", - "metric": "", - "refId": "A", - "step": 240 - }, - { - "expr": "sum(container_spec_memory_limit_bytes{instance=~\"$node:$port\",job=~\"$job\",image!=\"\"}) by (name) ", - "format": "table", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "B", - "step": 240 - }, - { - "expr": "container_memory_usage_bytes{name=~\".+\"}", - "format": "table", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "C", - "step": 240 - } - ], - "title": "Limit memory", - "transform": "table", - "type": "table" - } - ], - "refresh": "5m", - "schemaVersion": 18, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "default", - "value": "default" - }, - "hide": 0, - "includeAll": false, - "label": "datasource", - "multi": false, - "name": "DS_PROMETHEUS", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(container_cpu_user_seconds_total, job)", - "hide": 0, - "includeAll": false, - "label": "Job", - "multi": false, - "name": "job", - "options": [], - "query": "label_values(container_cpu_user_seconds_total, job)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(container_cpu_user_seconds_total{job=~\"$job\"}, instance)", - "hide": 0, - "includeAll": false, - "label": "Host:", - "multi": false, - "name": "node", - "options": [], - "query": "label_values(container_cpu_user_seconds_total{job=~\"$job\"}, instance)", - "refresh": 1, - "regex": "/([^:]+):.*/", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": null, - "tags": [], - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(container_cpu_user_seconds_total{instance=~\"$node:(.*)\"}, instance)", - "hide": 0, - "includeAll": false, - "label": "Port", - "multi": false, - "name": "port", - "options": [], - "query": "label_values(container_cpu_user_seconds_total{instance=~\"$node:(.*)\"}, instance)", - "refresh": 1, - "regex": "/[^:]+:(.*)/", - "skipUrlSync": false, - "sort": 3, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "auto": true, - "auto_count": 30, - "auto_min": "50s", - "current": { - "text": "1m", - "value": "1m" - }, - "hide": 0, - "label": "Interval", - "name": "interval", - "options": [ - { - "selected": false, - "text": "auto", - "value": "$__auto_interval_interval" - }, - { - "selected": true, - "text": "1m", - "value": "1m" - }, - { - "selected": false, - "text": "5m", - "value": "5m" - }, - { - "selected": false, - "text": "10m", - "value": "10m" - }, - { - "selected": false, - "text": "30m", - "value": "30m" - }, - { - "selected": false, - "text": "1h", - "value": "1h" - }, - { - "selected": false, - "text": "6h", - "value": "6h" - }, - { - "selected": false, - "text": "12h", - "value": "12h" - }, - { - "selected": false, - "text": "1d", - "value": "1d" - }, - { - "selected": false, - "text": "7d", - "value": "7d" - }, - { - "selected": false, - "text": "14d", - "value": "14d" - }, - { - "selected": false, - "text": "30d", - "value": "30d" - } - ], - "query": "1m,5m,10m,30m,1h,6h,12h,1d,7d,14d,30d", - "refresh": 2, - "skipUrlSync": false, - "type": "interval" - } - ] - }, - "time": { - "from": "now-4h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Docker cAdvisor", - "version": 1 -} diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/node_exporter.json b/terraform-ci-infra/1n_nmd/grafana/conf/node_exporter.json deleted file mode 100644 index 766d5afec3..0000000000 --- a/terraform-ci-infra/1n_nmd/grafana/conf/node_exporter.json +++ /dev/null @@ -1,13696 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "panel", - "id": "gauge", - "name": "Gauge", - "version": "" - }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "6.7.3" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "1.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "" - } - ], - "annotations": { - "list": [ - { - "$$hashKey": "object:1058", - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "gnetId": 1860, - "graphTooltip": 0, - "id": null, - "iteration": 1595837627257, - "links": [], - "panels": [ - { - "collapsed": false, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 261, - "panels": [], - "repeat": null, - "title": "Quick CPU / Mem / Disk", - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Busy state of all CPU cores together", - "gridPos": { - "h": 4, - "w": 3, - "x": 0, - "y": 1 - }, - "id": 20, - "links": [], - "options": { - "fieldOptions": { - "calcs": [ - "lastNotNull" - ], - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 100, - "min": 0, - "nullValueMode": "null", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 85 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 95 - } - ] - }, - "unit": "percent" - }, - "overrides": [], - "values": false - }, - "orientation": "horizontal", - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "6.7.3", - "targets": [ - { - "expr": "(((count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))) - avg(sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])))) * 100) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", - "hide": false, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A", - "step": 900 - } - ], - "title": "CPU Busy", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Busy state of all CPU cores together (5 min average)", - "gridPos": { - "h": 4, - "w": 3, - "x": 3, - "y": 1 - }, - "id": 155, - "links": [], - "options": { - "fieldOptions": { - "calcs": [ - "lastNotNull" - ], - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 100, - "min": 0, - "nullValueMode": "null", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 85 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 95 - } - ] - }, - "unit": "percent" - }, - "overrides": [], - "values": false - }, - "orientation": "horizontal", - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "6.7.3", - "targets": [ - { - "expr": "avg(node_load5{instance=\"$node\",job=\"$job\"}) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)) * 100", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "refId": "A", - "step": 900 - } - ], - "title": "Sys Load (5m avg)", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Busy state of all CPU cores together (15 min average)", - "gridPos": { - "h": 4, - "w": 3, - "x": 6, - "y": 1 - }, - "id": 19, - "links": [], - "options": { - "fieldOptions": { - "calcs": [ - "lastNotNull" - ], - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 100, - "min": 0, - "nullValueMode": "null", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 85 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 95 - } - ] - }, - "unit": "percent" - }, - "overrides": [], - "values": false - }, - "orientation": "horizontal", - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "6.7.3", - "targets": [ - { - "expr": "avg(node_load15{instance=\"$node\",job=\"$job\"}) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)) * 100", - "hide": false, - "intervalFactor": 1, - "refId": "A", - "step": 900 - } - ], - "title": "Sys Load (15m avg)", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Non available RAM memory", - "gridPos": { - "h": 4, - "w": 3, - "x": 9, - "y": 1 - }, - "hideTimeOverride": false, - "id": 16, - "links": [], - "options": { - "fieldOptions": { - "calcs": [ - "lastNotNull" - ], - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [], - "max": 100, - "min": 0, - "nullValueMode": "null", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 80 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 - } - ] - }, - "unit": "percent" - }, - "overrides": [], - "values": false - }, - "orientation": "horizontal", - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "6.7.3", - "targets": [ - { - "expr": "((node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} )) * 100", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "refId": "A", - "step": 900 - }, - { - "expr": "100 - ((node_memory_MemAvailable_bytes{instance=\"$node\",job=\"$job\"} * 100) / node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "refId": "B", - "step": 900 - } - ], - "title": "RAM Used", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Used Swap", - "gridPos": { - "h": 4, - "w": 3, - "x": 12, - "y": 1 - }, - "id": 21, - "links": [], - "options": { - "fieldOptions": { - "calcs": [ - "lastNotNull" - ], - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 100, - "min": 0, - "nullValueMode": "null", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 10 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 25 - } - ] - }, - "unit": "percent" - }, - "overrides": [], - "values": false - }, - "orientation": "horizontal", - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "6.7.3", - "targets": [ - { - "expr": "((node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} )) * 100", - "intervalFactor": 1, - "refId": "A", - "step": 900 - } - ], - "title": "SWAP Used", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Used Root FS", - "gridPos": { - "h": 4, - "w": 3, - "x": 15, - "y": 1 - }, - "id": 154, - "links": [], - "options": { - "fieldOptions": { - "calcs": [ - "lastNotNull" - ], - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "id": 0, - "op": "=", - "text": "N/A", - "type": 1, - "value": "null" - } - ], - "max": 100, - "min": 0, - "nullValueMode": "null", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "rgba(50, 172, 45, 0.97)", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 80 - }, - { - "color": "rgba(245, 54, 54, 0.9)", - "value": 90 - } - ] - }, - "unit": "percent" - }, - "overrides": [], - "values": false - }, - "orientation": "horizontal", - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "6.7.3", - "targets": [ - { - "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"})", - "format": "time_series", - "intervalFactor": 1, - "refId": "A", - "step": 900 - } - ], - "title": "Root FS Used", - "type": "gauge" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "description": "Total number of CPU cores", - "format": "short", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 1 - }, - "id": 14, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "maxPerRow": 6, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A", - "step": 900 - } - ], - "thresholds": "", - "title": "CPU Cores", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 1, - "description": "System uptime", - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 4, - "x": 20, - "y": 1 - }, - "hideTimeOverride": true, - "id": 15, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "$$hashKey": "object:1094", - "name": "value to text", - "value": 1 - }, - { - "$$hashKey": "object:1095", - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "null", - "nullText": null, - "postfix": "s", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "node_time_seconds{instance=\"$node\",job=\"$job\"} - node_boot_time_seconds{instance=\"$node\",job=\"$job\"}", - "intervalFactor": 2, - "refId": "A", - "step": 1800 - } - ], - "thresholds": "", - "title": "Uptime", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "$$hashKey": "object:1097", - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "description": "Total RootFS", - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 18, - "y": 3 - }, - "id": 23, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "maxPerRow": 6, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "refId": "A", - "step": 900 - } - ], - "thresholds": "70,90", - "title": "RootFS Total", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "description": "Total RAM", - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 20, - "y": 3 - }, - "id": 75, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "maxPerRow": 6, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "70%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", - "intervalFactor": 1, - "refId": "A", - "step": 900 - } - ], - "thresholds": "", - "title": "RAM Total", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 0, - "description": "Total SWAP", - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 22, - "y": 3 - }, - "id": 18, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "maxPerRow": 6, - "nullPointMode": "null", - "nullText": null, - "postfix": "", - "postfixFontSize": "70%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"}", - "intervalFactor": 1, - "refId": "A", - "step": 900 - } - ], - "thresholds": "", - "title": "SWAP Total", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "collapsed": false, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "id": 263, - "panels": [], - "repeat": null, - "title": "Basic CPU / Mem / Net / Disk", - "type": "row" - }, - { - "aliasColors": { - "Busy": "#EAB839", - "Busy Iowait": "#890F02", - "Busy other": "#1F78C1", - "Idle": "#052B51", - "Idle - Waiting for something to happen": "#052B51", - "guest": "#9AC48A", - "idle": "#052B51", - "iowait": "#EAB839", - "irq": "#BF1B00", - "nice": "#C15C17", - "softirq": "#E24D42", - "steal": "#FCE2DE", - "system": "#508642", - "user": "#5195CE" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "description": "Basic CPU info", - "fill": 4, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 6 - }, - "hiddenSeries": false, - "id": 77, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": 250, - "sort": null, - "sortDesc": null, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": true, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Busy Iowait", - "color": "#890F02" - }, - { - "alias": "Idle", - "color": "#7EB26D" - }, - { - "alias": "Busy System", - "color": "#EAB839" - }, - { - "alias": "Busy User", - "color": "#0A437C" - }, - { - "alias": "Busy Other", - "color": "#6D1F62" - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Busy System", - "refId": "A", - "step": 240 - }, - { - "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode='user',instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Busy User", - "refId": "B", - "step": 240 - }, - { - "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode='iowait',instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Busy Iowait", - "refId": "C", - "step": 240 - }, - { - "expr": "sum by (instance)(irate(node_cpu_seconds_total{mode=~\".*irq\",instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Busy IRQs", - "refId": "D", - "step": 240 - }, - { - "expr": "sum (irate(node_cpu_seconds_total{mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq',instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Busy Other", - "refId": "E", - "step": 240 - }, - { - "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Idle", - "refId": "F", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU Basic", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Apps": "#629E51", - "Buffers": "#614D93", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Free": "#0A437C", - "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", - "Inactive": "#584477", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "RAM_Free": "#E0F9D7", - "SWAP Used": "#BF1B00", - "Slab": "#806EB7", - "Slab_Cache": "#E0752D", - "Swap": "#BF1B00", - "Swap Used": "#BF1B00", - "Swap_Cache": "#C15C17", - "Swap_Free": "#2F575E", - "Unused": "#EAB839" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "description": "Basic memory usage", - "fill": 4, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 6 - }, - "hiddenSeries": false, - "id": 78, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sideWidth": 350, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "RAM Total", - "color": "#E0F9D7", - "fill": 0, - "stack": false - }, - { - "alias": "RAM Cache + Buffer", - "color": "#052B51" - }, - { - "alias": "RAM Free", - "color": "#7EB26D" - }, - { - "alias": "Avaliable", - "color": "#DEDAF7", - "fill": 0, - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "RAM Total", - "refId": "A", - "step": 240 - }, - { - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - (node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "RAM Used", - "refId": "B", - "step": 240 - }, - { - "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "RAM Cache + Buffer", - "refId": "C", - "step": 240 - }, - { - "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "RAM Free", - "refId": "D", - "step": 240 - }, - { - "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "SWAP Used", - "refId": "E", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Basic", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Recv_bytes_eth2": "#7EB26D", - "Recv_bytes_lo": "#0A50A1", - "Recv_drop_eth2": "#6ED0E0", - "Recv_drop_lo": "#E0F9D7", - "Recv_errs_eth2": "#BF1B00", - "Recv_errs_lo": "#CCA300", - "Trans_bytes_eth2": "#7EB26D", - "Trans_bytes_lo": "#0A50A1", - "Trans_drop_eth2": "#6ED0E0", - "Trans_drop_lo": "#E0F9D7", - "Trans_errs_eth2": "#BF1B00", - "Trans_errs_lo": "#CCA300", - "recv_bytes_lo": "#0A50A1", - "recv_drop_eth0": "#99440A", - "recv_drop_lo": "#967302", - "recv_errs_eth0": "#BF1B00", - "recv_errs_lo": "#890F02", - "trans_bytes_eth0": "#7EB26D", - "trans_bytes_lo": "#0A50A1", - "trans_drop_eth0": "#99440A", - "trans_drop_lo": "#967302", - "trans_errs_eth0": "#BF1B00", - "trans_errs_lo": "#890F02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "Basic network info per interface", - "fill": 4, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 13 - }, - "hiddenSeries": false, - "id": 74, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*trans.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "recv {{device}}", - "refId": "A", - "step": 240 - }, - { - "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "trans {{device}} ", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Traffic Basic", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "pps", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 3, - "description": "Disk space used of all filesystems mounted", - "fill": 4, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 13 - }, - "height": "", - "hiddenSeries": false, - "id": 152, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mountpoint}}", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Disk Space Used Basic", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 265, - "panels": [ - { - "aliasColors": { - "Idle - Waiting for something to happen": "#052B51", - "guest": "#9AC48A", - "idle": "#052B51", - "iowait": "#EAB839", - "irq": "#BF1B00", - "nice": "#C15C17", - "softirq": "#E24D42", - "steal": "#FCE2DE", - "system": "#508642", - "user": "#5195CE" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "description": "", - "fill": 4, - "fillGradient": 0, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 21 - }, - "hiddenSeries": false, - "id": 3, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 250, - "sort": null, - "sortDesc": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": true, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "interval": "10s", - "intervalFactor": 2, - "legendFormat": "System - Processes executing in kernel mode", - "refId": "A", - "step": 20 - }, - { - "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='user',instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "User - Normal processes executing in user mode", - "refId": "B", - "step": 240 - }, - { - "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='nice',instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Nice - Niced processes executing in user mode", - "refId": "C", - "step": 240 - }, - { - "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Idle - Waiting for something to happen", - "refId": "D", - "step": 240 - }, - { - "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='iowait',instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Iowait - Waiting for I/O to complete", - "refId": "E", - "step": 240 - }, - { - "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='irq',instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Irq - Servicing interrupts", - "refId": "F", - "step": 240 - }, - { - "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='softirq',instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Softirq - Servicing softirqs", - "refId": "G", - "step": 240 - }, - { - "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='steal',instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Steal - Time spent in other operating systems when running in a virtualized environment", - "refId": "H", - "step": 240 - }, - { - "expr": "sum by (mode)(irate(node_cpu_seconds_total{mode='guest',instance=\"$node\",job=\"$job\"}[5m])) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Guest - Time spent running a virtual CPU for a guest operating system", - "refId": "I", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "percentage", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Apps": "#629E51", - "Buffers": "#614D93", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Free": "#0A437C", - "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", - "Inactive": "#584477", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "RAM_Free": "#E0F9D7", - "Slab": "#806EB7", - "Slab_Cache": "#E0752D", - "Swap": "#BF1B00", - "Swap - Swap memory usage": "#BF1B00", - "Swap_Cache": "#C15C17", - "Swap_Free": "#2F575E", - "Unused": "#EAB839", - "Unused - Free memory unassigned": "#052B51" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "description": "", - "fill": 4, - "fillGradient": 0, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 21 - }, - "hiddenSeries": false, - "id": 24, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 350, - "sort": null, - "sortDesc": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Hardware Corrupted - *./", - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"} - node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Apps - Memory used by user-space applications", - "refId": "A", - "step": 240 - }, - { - "expr": "node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "PageTables - Memory used to map between virtual and physical memory addresses", - "refId": "B", - "step": 240 - }, - { - "expr": "node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified", - "refId": "C", - "step": 240 - }, - { - "expr": "node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)", - "refId": "D", - "step": 240 - }, - { - "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Cache - Parked file data (file content) cache", - "refId": "E", - "step": 240 - }, - { - "expr": "node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Buffers - Block device (e.g. harddisk) cache", - "refId": "F", - "step": 240 - }, - { - "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Unused - Free memory unassigned", - "refId": "G", - "step": 240 - }, - { - "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Swap - Swap space used", - "refId": "H", - "step": 240 - }, - { - "expr": "node_memory_HardwareCorrupted_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working", - "refId": "I", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Stack", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "receive_packets_eth0": "#7EB26D", - "receive_packets_lo": "#E24D42", - "transmit_packets_eth0": "#7EB26D", - "transmit_packets_lo": "#E24D42" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 4, - "fillGradient": 0, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 33 - }, - "hiddenSeries": false, - "id": 84, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:5871", - "alias": "/.*Trans.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Receive", - "refId": "A", - "step": 240 - }, - { - "expr": "irate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[5m])*8", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Transmit", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Traffic", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:5884", - "format": "bps", - "label": "bits out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:5885", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 3, - "description": "", - "fill": 4, - "fillGradient": 0, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 33 - }, - "height": "", - "hiddenSeries": false, - "id": 156, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": false, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mountpoint}}", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Disk Space Used", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 45 - }, - "hiddenSeries": false, - "id": 229, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Read.*/", - "transform": "negative-Y" - }, - { - "alias": "/.*sda_.*/", - "color": "#7EB26D" - }, - { - "alias": "/.*sdb_.*/", - "color": "#EAB839" - }, - { - "alias": "/.*sdc_.*/", - "color": "#6ED0E0" - }, - { - "alias": "/.*sdd_.*/", - "color": "#EF843C" - }, - { - "alias": "/.*sde_.*/", - "color": "#E24D42" - }, - { - "alias": "/.*sda1.*/", - "color": "#584477" - }, - { - "alias": "/.*sda2_.*/", - "color": "#BA43A9" - }, - { - "alias": "/.*sda3_.*/", - "color": "#F4D598" - }, - { - "alias": "/.*sdb1.*/", - "color": "#0A50A1" - }, - { - "alias": "/.*sdb2.*/", - "color": "#BF1B00" - }, - { - "alias": "/.*sdb2.*/", - "color": "#BF1B00" - }, - { - "alias": "/.*sdb3.*/", - "color": "#E0752D" - }, - { - "alias": "/.*sdc1.*/", - "color": "#962D82" - }, - { - "alias": "/.*sdc2.*/", - "color": "#614D93" - }, - { - "alias": "/.*sdc3.*/", - "color": "#9AC48A" - }, - { - "alias": "/.*sdd1.*/", - "color": "#65C5DB" - }, - { - "alias": "/.*sdd2.*/", - "color": "#F9934E" - }, - { - "alias": "/.*sdd3.*/", - "color": "#EA6460" - }, - { - "alias": "/.*sde1.*/", - "color": "#E0F9D7" - }, - { - "alias": "/.*sdd2.*/", - "color": "#FCEACA" - }, - { - "alias": "/.*sde3.*/", - "color": "#F9E2D2" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])", - "intervalFactor": 4, - "legendFormat": "{{device}} - Reads completed", - "refId": "A", - "step": 480 - }, - { - "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])", - "intervalFactor": 2, - "legendFormat": "{{device}} - Writes completed", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Disk IOps", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "iops", - "label": "IO read (-) / write (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "io time": "#890F02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 3, - "description": "", - "fill": 4, - "fillGradient": 0, - "gridPos": { - "h": 12, - "w": 12, - "x": 12, - "y": 45 - }, - "hiddenSeries": false, - "id": 42, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": null, - "sortDesc": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*read*./", - "transform": "negative-Y" - }, - { - "alias": "/.*sda.*/", - "color": "#7EB26D" - }, - { - "alias": "/.*sdb.*/", - "color": "#EAB839" - }, - { - "alias": "/.*sdc.*/", - "color": "#6ED0E0" - }, - { - "alias": "/.*sdd.*/", - "color": "#EF843C" - }, - { - "alias": "/.*sde.*/", - "color": "#E24D42" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{device}} - Successfully read bytes", - "refId": "A", - "step": 240 - }, - { - "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[5m])", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{device}} - Successfully written bytes", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "I/O Usage Read / Write", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes read (-) / write (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "ms", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "io time": "#890F02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 3, - "description": "", - "fill": 4, - "fillGradient": 0, - "gridPos": { - "h": 12, - "w": 12, - "x": 0, - "y": 57 - }, - "hiddenSeries": false, - "id": 127, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": null, - "sortDesc": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"} [5m])", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{device}} - Time spent doing I/Os", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "I/O Usage Times", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": "time", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "s", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "CPU / Memory / Net / Disk", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 21 - }, - "id": 266, - "panels": [ - { - "aliasColors": { - "Apps": "#629E51", - "Buffers": "#614D93", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Free": "#0A437C", - "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", - "Inactive": "#584477", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "RAM_Free": "#E0F9D7", - "Slab": "#806EB7", - "Slab_Cache": "#E0752D", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Swap_Free": "#2F575E", - "Unused": "#EAB839" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 70 - }, - "hiddenSeries": false, - "id": 136, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 350, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 2, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_Inactive_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Inactive - Memory which has been less recently used. It is more eligible to be reclaimed for other purposes", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_Active_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Active - Memory that has been used more recently and usually not reclaimed unless absolutely necessary", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Active / Inactive", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Apps": "#629E51", - "Buffers": "#614D93", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Free": "#0A437C", - "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", - "Inactive": "#584477", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "RAM_Free": "#E0F9D7", - "Slab": "#806EB7", - "Slab_Cache": "#E0752D", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Swap_Free": "#2F575E", - "Unused": "#EAB839" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 70 - }, - "hiddenSeries": false, - "id": 135, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 350, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Committed_AS - *./" - }, - { - "alias": "/.*CommitLimit - *./", - "color": "#BF1B00", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_Committed_AS_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Committed_AS - Amount of memory presently allocated on the system", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_CommitLimit_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "CommitLimit - Amount of memory currently available to be allocated on the system", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Commited", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Apps": "#629E51", - "Buffers": "#614D93", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Free": "#0A437C", - "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", - "Inactive": "#584477", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "RAM_Free": "#E0F9D7", - "Slab": "#806EB7", - "Slab_Cache": "#E0752D", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Swap_Free": "#2F575E", - "Unused": "#EAB839" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 80 - }, - "hiddenSeries": false, - "id": 191, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 350, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_Inactive_file_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Inactive_file - File-backed memory on inactive LRU list", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_Inactive_anon_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Inactive_anon - Anonymous and swap cache on inactive LRU list, including tmpfs (shmem)", - "refId": "B", - "step": 4 - }, - { - "expr": "node_memory_Active_file_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Active_file - File-backed memory on active LRU list", - "refId": "C", - "step": 4 - }, - { - "expr": "node_memory_Active_anon_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Active_anon - Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs", - "refId": "D", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Active / Inactive Detail", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Active": "#99440A", - "Buffers": "#58140C", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Dirty": "#6ED0E0", - "Free": "#B7DBAB", - "Inactive": "#EA6460", - "Mapped": "#052B51", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "Slab_Cache": "#EAB839", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Total": "#511749", - "Total RAM": "#052B51", - "Total RAM + Swap": "#052B51", - "Total Swap": "#614D93", - "VmallocUsed": "#EA6460" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 80 - }, - "hiddenSeries": false, - "id": 130, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 2, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_Writeback_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Writeback - Memory which is actively being written back to disk", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_WritebackTmp_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "WritebackTmp - Memory used by FUSE for temporary writeback buffers", - "refId": "B", - "step": 4 - }, - { - "expr": "node_memory_Dirty_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Dirty - Memory which is waiting to get written back to the disk", - "refId": "C", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Writeback and Dirty", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Apps": "#629E51", - "Buffers": "#614D93", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Free": "#0A437C", - "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", - "Inactive": "#584477", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "RAM_Free": "#E0F9D7", - "Slab": "#806EB7", - "Slab_Cache": "#E0752D", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Swap_Free": "#2F575E", - "Unused": "#EAB839" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 90 - }, - "hiddenSeries": false, - "id": 138, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 350, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:4131", - "alias": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages", - "fill": 0 - }, - { - "$$hashKey": "object:4138", - "alias": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_Mapped_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Mapped - Used memory in mapped pages files which have been mmaped, such as libraries", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_Shmem_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Shmem - Used shared memory (shared between several processes, thus including RAM disks)", - "refId": "B", - "step": 4 - }, - { - "expr": "node_memory_ShmemHugePages_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages", - "refId": "C", - "step": 4 - }, - { - "expr": "node_memory_ShmemPmdMapped_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "ShmemPmdMapped - Ammount of shared (shmem/tmpfs) memory backed by huge pages", - "refId": "D", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Shared and Mapped", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:4106", - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:4107", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Active": "#99440A", - "Buffers": "#58140C", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Dirty": "#6ED0E0", - "Free": "#B7DBAB", - "Inactive": "#EA6460", - "Mapped": "#052B51", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "Slab_Cache": "#EAB839", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Total": "#511749", - "Total RAM": "#052B51", - "Total RAM + Swap": "#052B51", - "Total Swap": "#614D93", - "VmallocUsed": "#EA6460" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 90 - }, - "hiddenSeries": false, - "id": 131, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 2, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_SUnreclaim_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "SUnreclaim - Part of Slab, that cannot be reclaimed on memory pressure", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "SReclaimable - Part of Slab, that might be reclaimed, such as caches", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Slab", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Active": "#99440A", - "Buffers": "#58140C", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Dirty": "#6ED0E0", - "Free": "#B7DBAB", - "Inactive": "#EA6460", - "Mapped": "#052B51", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "Slab_Cache": "#EAB839", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Total": "#511749", - "Total RAM": "#052B51", - "Total RAM + Swap": "#052B51", - "VmallocUsed": "#EA6460" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 100 - }, - "hiddenSeries": false, - "id": 70, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_VmallocChunk_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "VmallocChunk - Largest contigious block of vmalloc area which is free", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_VmallocTotal_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "VmallocTotal - Total size of vmalloc memory area", - "refId": "B", - "step": 4 - }, - { - "expr": "node_memory_VmallocUsed_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "VmallocUsed - Amount of vmalloc area which is used", - "refId": "C", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Vmalloc", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Apps": "#629E51", - "Buffers": "#614D93", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Free": "#0A437C", - "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", - "Inactive": "#584477", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "RAM_Free": "#E0F9D7", - "Slab": "#806EB7", - "Slab_Cache": "#E0752D", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Swap_Free": "#2F575E", - "Unused": "#EAB839" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 100 - }, - "hiddenSeries": false, - "id": 159, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 350, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_Bounce_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Bounce - Memory used for block device bounce buffers", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Bounce", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Active": "#99440A", - "Buffers": "#58140C", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Dirty": "#6ED0E0", - "Free": "#B7DBAB", - "Inactive": "#EA6460", - "Mapped": "#052B51", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "Slab_Cache": "#EAB839", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Total": "#511749", - "Total RAM": "#052B51", - "Total RAM + Swap": "#052B51", - "VmallocUsed": "#EA6460" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 110 - }, - "hiddenSeries": false, - "id": 129, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Inactive *./", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_AnonHugePages_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "AnonHugePages - Memory in anonymous huge pages", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_AnonPages_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "AnonPages - Memory in user pages not backed by files", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Anonymous", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Apps": "#629E51", - "Buffers": "#614D93", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Free": "#0A437C", - "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", - "Inactive": "#584477", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "RAM_Free": "#E0F9D7", - "Slab": "#806EB7", - "Slab_Cache": "#E0752D", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Swap_Free": "#2F575E", - "Unused": "#EAB839" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 110 - }, - "hiddenSeries": false, - "id": 160, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 350, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 2, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_KernelStack_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "KernelStack - Kernel memory stack. This is not reclaimable", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_Percpu_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "PerCPU - Per CPU memory allocated dynamically by loadable modules", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Kernel / CPU", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Active": "#99440A", - "Buffers": "#58140C", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Dirty": "#6ED0E0", - "Free": "#B7DBAB", - "Inactive": "#EA6460", - "Mapped": "#052B51", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "Slab_Cache": "#EAB839", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Total": "#511749", - "Total RAM": "#806EB7", - "Total RAM + Swap": "#806EB7", - "VmallocUsed": "#EA6460" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 120 - }, - "hiddenSeries": false, - "id": 140, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_HugePages_Free{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "HugePages_Free - Huge pages in the pool that are not yet allocated", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_HugePages_Rsvd{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "HugePages_Rsvd - Huge pages for which a commitment to allocate from the pool has been made, but no allocation has yet been made", - "refId": "B", - "step": 4 - }, - { - "expr": "node_memory_HugePages_Surp{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "HugePages_Surp - Huge pages in the pool above the value in /proc/sys/vm/nr_hugepages", - "refId": "C", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory HugePages Counter", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "pages", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Active": "#99440A", - "Buffers": "#58140C", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Dirty": "#6ED0E0", - "Free": "#B7DBAB", - "Inactive": "#EA6460", - "Mapped": "#052B51", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "Slab_Cache": "#EAB839", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Total": "#511749", - "Total RAM": "#806EB7", - "Total RAM + Swap": "#806EB7", - "VmallocUsed": "#EA6460" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 120 - }, - "hiddenSeries": false, - "id": 71, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 2, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_HugePages_Total{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "HugePages - Total size of the pool of huge pages", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Hugepagesize - Huge Page size", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory HugePages Size", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Active": "#99440A", - "Buffers": "#58140C", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Dirty": "#6ED0E0", - "Free": "#B7DBAB", - "Inactive": "#EA6460", - "Mapped": "#052B51", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "Slab_Cache": "#EAB839", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Total": "#511749", - "Total RAM": "#052B51", - "Total RAM + Swap": "#052B51", - "VmallocUsed": "#EA6460" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 130 - }, - "hiddenSeries": false, - "id": 128, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_DirectMap1G_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "DirectMap1G - Amount of pages mapped as this size", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_DirectMap2M_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "DirectMap2M - Amount of pages mapped as this size", - "refId": "B", - "step": 4 - }, - { - "expr": "node_memory_DirectMap4k_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "DirectMap4K - Amount of pages mapped as this size", - "refId": "C", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory DirectMap", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Apps": "#629E51", - "Buffers": "#614D93", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Free": "#0A437C", - "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", - "Inactive": "#584477", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "RAM_Free": "#E0F9D7", - "Slab": "#806EB7", - "Slab_Cache": "#E0752D", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Swap_Free": "#2F575E", - "Unused": "#EAB839" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 130 - }, - "hiddenSeries": false, - "id": 137, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 350, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_Unevictable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Unevictable - Amount of unevictable memory that can't be swapped out for a variety of reasons", - "refId": "A", - "step": 4 - }, - { - "expr": "node_memory_Mlocked_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "MLocked - Size of pages locked to memory using the mlock() system call", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Unevictable and MLocked", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Active": "#99440A", - "Buffers": "#58140C", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Dirty": "#6ED0E0", - "Free": "#B7DBAB", - "Inactive": "#EA6460", - "Mapped": "#052B51", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "Slab_Cache": "#EAB839", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Total": "#511749", - "Total RAM": "#052B51", - "Total RAM + Swap": "#052B51", - "Total Swap": "#614D93", - "VmallocUsed": "#EA6460" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 140 - }, - "hiddenSeries": false, - "id": 132, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_NFS_Unstable_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "NFS Unstable - Memory in NFS pages sent to the server, but not yet commited to the storage", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory NFS", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "Memory Meminfo", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 22 - }, - "id": 267, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 23 - }, - "hiddenSeries": false, - "id": 176, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*out/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_vmstat_pgpgin{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Pagesin - Page in operations", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_vmstat_pgpgout{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Pagesout - Page out operations", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Pages In / Out", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "pages out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 23 - }, - "hiddenSeries": false, - "id": 22, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*out/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_vmstat_pswpin{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Pswpin - Pages swapped in", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_vmstat_pswpout{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Pswpout - Pages swapped out", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Pages Swap In / Out", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "pages out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Apps": "#629E51", - "Buffers": "#614D93", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Free": "#0A437C", - "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", - "Inactive": "#584477", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "RAM_Free": "#E0F9D7", - "Slab": "#806EB7", - "Slab_Cache": "#E0752D", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Swap_Free": "#2F575E", - "Unused": "#EAB839" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 33 - }, - "hiddenSeries": false, - "id": 175, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 350, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:6118", - "alias": "Pgfault - Page major and minor fault operations", - "fill": 0, - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Pgfault - Page major and minor fault operations", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Pgmajfault - Major page fault operations", - "refId": "B", - "step": 4 - }, - { - "expr": "irate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[5m]) - irate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Pgminfault - Minor page fault operations", - "refId": "C", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Page Faults", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:6133", - "format": "short", - "label": "faults", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:6134", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "Active": "#99440A", - "Buffers": "#58140C", - "Cache": "#6D1F62", - "Cached": "#511749", - "Committed": "#508642", - "Dirty": "#6ED0E0", - "Free": "#B7DBAB", - "Inactive": "#EA6460", - "Mapped": "#052B51", - "PageTables": "#0A50A1", - "Page_Tables": "#0A50A1", - "Slab_Cache": "#EAB839", - "Swap": "#BF1B00", - "Swap_Cache": "#C15C17", - "Total": "#511749", - "Total RAM": "#052B51", - "Total RAM + Swap": "#052B51", - "Total Swap": "#614D93", - "VmallocUsed": "#EA6460" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 2, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 33 - }, - "hiddenSeries": false, - "id": 307, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_vmstat_oom_kill{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "oom killer invocations ", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "OOM Killer", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:5373", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:5374", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "Memory Vmstat", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 23 - }, - "id": 293, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 24 - }, - "hiddenSeries": false, - "id": 260, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Variation*./", - "color": "#890F02" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_timex_estimated_error_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Estimated error in seconds", - "refId": "A", - "step": 240 - }, - { - "expr": "node_timex_offset_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Time offset in between local system and reference clock", - "refId": "B", - "step": 240 - }, - { - "expr": "node_timex_maxerror_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Maximum error in seconds", - "refId": "C", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Time Syncronized Drift", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": "seconds", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 24 - }, - "hiddenSeries": false, - "id": 291, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_timex_loop_time_constant{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Phase-locked loop time adjust", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Time PLL Adjust", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 34 - }, - "hiddenSeries": false, - "id": 168, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Variation*./", - "color": "#890F02" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_timex_sync_status{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Is clock synchronized to a reliable server (1 = yes, 0 = no)", - "refId": "A", - "step": 240 - }, - { - "expr": "node_timex_frequency_adjustment_ratio{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Local clock frequency adjustment", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Time Syncronized Status", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 34 - }, - "hiddenSeries": false, - "id": 294, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_timex_tick_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Seconds between clock ticks", - "refId": "A", - "step": 240 - }, - { - "expr": "node_timex_tai_offset_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "International Atomic Time (TAI) offset", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Time Misc", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": "seconds", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "title": "System Timesync", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 24 - }, - "id": 312, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 7 - }, - "hiddenSeries": false, - "id": 62, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_procs_blocked{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Processes blocked waiting for I/O to complete", - "refId": "A", - "step": 240 - }, - { - "expr": "node_procs_running{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Processes in runnable state", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Processes Status", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:6500", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:6501", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 7 - }, - "hiddenSeries": false, - "id": 315, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_processes_state{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ state }}", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Processes State", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:6500", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:6501", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 17 - }, - "hiddenSeries": false, - "id": 148, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_forks_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Processes forks second", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Processes Forks", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:6640", - "format": "short", - "label": "forks / sec", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:6641", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 17 - }, - "hiddenSeries": false, - "id": 149, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Max.*/", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}[5m])", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Processes virtual memory size in bytes", - "refId": "A", - "step": 240 - }, - { - "expr": "process_resident_memory_max_bytes{instance=\"$node\",job=\"$job\"}", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Maximum amount of virtual memory available in bytes", - "refId": "B", - "step": 240 - }, - { - "expr": "irate(process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}[5m])", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Processes virtual memory size in bytes", - "refId": "C", - "step": 240 - }, - { - "expr": "irate(process_virtual_memory_max_bytes{instance=\"$node\",job=\"$job\"}[5m])", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Maximum amount of virtual memory available in bytes", - "refId": "D", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Processes Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 27 - }, - "hiddenSeries": false, - "id": 313, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:709", - "alias": "PIDs limit", - "color": "#F2495C", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_processes_pids{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Number of PIDs", - "refId": "A", - "step": 240 - }, - { - "expr": "node_processes_max_processes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "PIDs limit", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "PIDs Number and Limit", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:6500", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:6501", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 27 - }, - "hiddenSeries": false, - "id": 305, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:4963", - "alias": "/.*waiting.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_schedstat_running_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "CPU {{ cpu }} - seconds spent running a process", - "refId": "A", - "step": 240 - }, - { - "expr": "irate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "CPU {{ cpu }} - seconds spent by processing waiting for this CPU", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Process schedule stats Running / Waiting", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:4860", - "format": "s", - "label": "seconds", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:4861", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 37 - }, - "hiddenSeries": false, - "id": 314, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:709", - "alias": "Threads limit", - "color": "#F2495C", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_processes_threads{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Allocated threads", - "refId": "A", - "step": 240 - }, - { - "expr": "node_processes_max_threads{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Threads limit", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Threads Number and Limit", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:6500", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:6501", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "title": "System Processes", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 25 - }, - "id": 269, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 8 - }, - "hiddenSeries": false, - "id": 8, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_context_switches_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Context switches", - "refId": "A", - "step": 240 - }, - { - "expr": "irate(node_intr_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Interrupts", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Context Switches / Interrupts", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 8 - }, - "hiddenSeries": false, - "id": 7, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_load1{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 4, - "legendFormat": "Load 1m", - "refId": "A", - "step": 480 - }, - { - "expr": "node_load5{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 4, - "legendFormat": "Load 5m", - "refId": "B", - "step": 480 - }, - { - "expr": "node_load15{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 4, - "legendFormat": "Load 15m", - "refId": "C", - "step": 480 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "System Load", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:6261", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:6262", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 18 - }, - "hiddenSeries": false, - "id": 259, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Critical*./", - "color": "#E24D42", - "fill": 0 - }, - { - "alias": "/.*Max*./", - "color": "#EF843C", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_interrupts_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ type }} - {{ info }}", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Interrupts Detail", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 18 - }, - "hiddenSeries": false, - "id": 306, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_schedstat_timeslices_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "CPU {{ cpu }}", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Schedule timeslices executed by each cpu", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:4860", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:4861", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 28 - }, - "hiddenSeries": false, - "id": 151, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_entropy_available_bits{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Entropy available to random number generators", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Entropy", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:6568", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:6569", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 28 - }, - "hiddenSeries": false, - "id": 308, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(process_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Time spent", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU time spent in user and system contexts", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:4860", - "format": "s", - "label": "seconds", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:4861", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 38 - }, - "hiddenSeries": false, - "id": 64, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:6323", - "alias": "/.*Max*./", - "color": "#890F02", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "process_max_fds{instance=\"$node\",job=\"$job\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Maximum open file descriptors", - "refId": "A", - "step": 240 - }, - { - "expr": "process_open_fds{instance=\"$node\",job=\"$job\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Open file descriptors", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "File Descriptors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:6338", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:6339", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "System Misc", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 26 - }, - "id": 304, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 26 - }, - "hiddenSeries": false, - "id": 158, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:6726", - "alias": "/.*Critical*./", - "color": "#E24D42", - "fill": 0 - }, - { - "$$hashKey": "object:6727", - "alias": "/.*Max*./", - "color": "#EF843C", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_hwmon_temp_celsius{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ chip }} {{ sensor }} temp", - "refId": "A", - "step": 240 - }, - { - "expr": "node_hwmon_temp_crit_alarm_celsius{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ chip }} {{ sensor }} Critical Alarm", - "refId": "B", - "step": 240 - }, - { - "expr": "node_hwmon_temp_crit_celsius{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ chip }} {{ sensor }} Critical", - "refId": "C", - "step": 240 - }, - { - "expr": "node_hwmon_temp_crit_hyst_celsius{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ chip }} {{ sensor }} Critical Historical", - "refId": "D", - "step": 240 - }, - { - "expr": "node_hwmon_temp_max_celsius{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ chip }} {{ sensor }} Max", - "refId": "E", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Hardware temperature monitor", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:6750", - "format": "celsius", - "label": "temperature", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:6751", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 26 - }, - "hiddenSeries": false, - "id": 300, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:1655", - "alias": "/.*Max*./", - "color": "#EF843C", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_cooling_device_cur_state{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "Current {{ name }} in {{ type }}", - "refId": "A", - "step": 240 - }, - { - "expr": "node_cooling_device_max_state{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Max {{ name }} in {{ type }}", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Throttle cooling device", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1678", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1679", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 36 - }, - "hiddenSeries": false, - "id": 302, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_power_supply_online{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ power_supply }} online", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Power supply", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1678", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1679", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "title": "Hardware Misc", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 296, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 10 - }, - "hiddenSeries": false, - "id": 297, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_systemd_socket_accepted_connections_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ name }} Connections", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Systemd Sockets", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 10 - }, - "hiddenSeries": false, - "id": 298, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Failed", - "color": "#F2495C" - }, - { - "alias": "Inactive", - "color": "#FF9830" - }, - { - "alias": "Active", - "color": "#73BF69" - }, - { - "alias": "Deactivating", - "color": "#FFCB7D" - }, - { - "alias": "Activating", - "color": "#C8F2C2" - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"activating\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Activating", - "refId": "A", - "step": 240 - }, - { - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"active\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Active", - "refId": "B", - "step": 240 - }, - { - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"deactivating\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Deactivating", - "refId": "C", - "step": 240 - }, - { - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"failed\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Failed", - "refId": "D", - "step": 240 - }, - { - "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"inactive\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Inactive", - "refId": "E", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Systemd Units State", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "title": "Systemd", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 270, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 29 - }, - "hiddenSeries": false, - "id": 9, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - { - "$$hashKey": "object:2033", - "alias": "/.*Read.*/", - "transform": "negative-Y" - }, - { - "$$hashKey": "object:2034", - "alias": "/.*sda_.*/", - "color": "#7EB26D" - }, - { - "$$hashKey": "object:2035", - "alias": "/.*sdb_.*/", - "color": "#EAB839" - }, - { - "$$hashKey": "object:2036", - "alias": "/.*sdc_.*/", - "color": "#6ED0E0" - }, - { - "$$hashKey": "object:2037", - "alias": "/.*sdd_.*/", - "color": "#EF843C" - }, - { - "$$hashKey": "object:2038", - "alias": "/.*sde_.*/", - "color": "#E24D42" - }, - { - "$$hashKey": "object:2039", - "alias": "/.*sda1.*/", - "color": "#584477" - }, - { - "$$hashKey": "object:2040", - "alias": "/.*sda2_.*/", - "color": "#BA43A9" - }, - { - "$$hashKey": "object:2041", - "alias": "/.*sda3_.*/", - "color": "#F4D598" - }, - { - "$$hashKey": "object:2042", - "alias": "/.*sdb1.*/", - "color": "#0A50A1" - }, - { - "$$hashKey": "object:2043", - "alias": "/.*sdb2.*/", - "color": "#BF1B00" - }, - { - "$$hashKey": "object:2044", - "alias": "/.*sdb3.*/", - "color": "#E0752D" - }, - { - "$$hashKey": "object:2045", - "alias": "/.*sdc1.*/", - "color": "#962D82" - }, - { - "$$hashKey": "object:2046", - "alias": "/.*sdc2.*/", - "color": "#614D93" - }, - { - "$$hashKey": "object:2047", - "alias": "/.*sdc3.*/", - "color": "#9AC48A" - }, - { - "$$hashKey": "object:2048", - "alias": "/.*sdd1.*/", - "color": "#65C5DB" - }, - { - "$$hashKey": "object:2049", - "alias": "/.*sdd2.*/", - "color": "#F9934E" - }, - { - "$$hashKey": "object:2050", - "alias": "/.*sdd3.*/", - "color": "#EA6460" - }, - { - "$$hashKey": "object:2051", - "alias": "/.*sde1.*/", - "color": "#E0F9D7" - }, - { - "$$hashKey": "object:2052", - "alias": "/.*sdd2.*/", - "color": "#FCEACA" - }, - { - "$$hashKey": "object:2053", - "alias": "/.*sde3.*/", - "color": "#F9E2D2" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[5m])", - "intervalFactor": 4, - "legendFormat": "{{device}} - Reads completed", - "refId": "A", - "step": 8 - }, - { - "expr": "irate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[5m])", - "intervalFactor": 2, - "legendFormat": "{{device}} - Writes completed", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Disk IOps Completed", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:2186", - "format": "iops", - "label": "IO read (-) / write (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:2187", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 29 - }, - "hiddenSeries": false, - "id": 33, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Read.*/", - "transform": "negative-Y" - }, - { - "alias": "/.*sda_.*/", - "color": "#7EB26D" - }, - { - "alias": "/.*sdb_.*/", - "color": "#EAB839" - }, - { - "alias": "/.*sdc_.*/", - "color": "#6ED0E0" - }, - { - "alias": "/.*sdd_.*/", - "color": "#EF843C" - }, - { - "alias": "/.*sde_.*/", - "color": "#E24D42" - }, - { - "alias": "/.*sda1.*/", - "color": "#584477" - }, - { - "alias": "/.*sda2_.*/", - "color": "#BA43A9" - }, - { - "alias": "/.*sda3_.*/", - "color": "#F4D598" - }, - { - "alias": "/.*sdb1.*/", - "color": "#0A50A1" - }, - { - "alias": "/.*sdb2.*/", - "color": "#BF1B00" - }, - { - "alias": "/.*sdb3.*/", - "color": "#E0752D" - }, - { - "alias": "/.*sdc1.*/", - "color": "#962D82" - }, - { - "alias": "/.*sdc2.*/", - "color": "#614D93" - }, - { - "alias": "/.*sdc3.*/", - "color": "#9AC48A" - }, - { - "alias": "/.*sdd1.*/", - "color": "#65C5DB" - }, - { - "alias": "/.*sdd2.*/", - "color": "#F9934E" - }, - { - "alias": "/.*sdd3.*/", - "color": "#EA6460" - }, - { - "alias": "/.*sde1.*/", - "color": "#E0F9D7" - }, - { - "alias": "/.*sdd2.*/", - "color": "#FCEACA" - }, - { - "alias": "/.*sde3.*/", - "color": "#F9E2D2" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 4, - "legendFormat": "{{device}} - Read bytes", - "refId": "A", - "step": 8 - }, - { - "expr": "irate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Written bytes", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Disk R/W Data", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": "bytes read (-) / write (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 3, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 39 - }, - "hiddenSeries": false, - "id": 37, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Read.*/", - "transform": "negative-Y" - }, - { - "alias": "/.*sda_.*/", - "color": "#7EB26D" - }, - { - "alias": "/.*sdb_.*/", - "color": "#EAB839" - }, - { - "alias": "/.*sdc_.*/", - "color": "#6ED0E0" - }, - { - "alias": "/.*sdd_.*/", - "color": "#EF843C" - }, - { - "alias": "/.*sde_.*/", - "color": "#E24D42" - }, - { - "alias": "/.*sda1.*/", - "color": "#584477" - }, - { - "alias": "/.*sda2_.*/", - "color": "#BA43A9" - }, - { - "alias": "/.*sda3_.*/", - "color": "#F4D598" - }, - { - "alias": "/.*sdb1.*/", - "color": "#0A50A1" - }, - { - "alias": "/.*sdb2.*/", - "color": "#BF1B00" - }, - { - "alias": "/.*sdb3.*/", - "color": "#E0752D" - }, - { - "alias": "/.*sdc1.*/", - "color": "#962D82" - }, - { - "alias": "/.*sdc2.*/", - "color": "#614D93" - }, - { - "alias": "/.*sdc3.*/", - "color": "#9AC48A" - }, - { - "alias": "/.*sdd1.*/", - "color": "#65C5DB" - }, - { - "alias": "/.*sdd2.*/", - "color": "#F9934E" - }, - { - "alias": "/.*sdd3.*/", - "color": "#EA6460" - }, - { - "alias": "/.*sde1.*/", - "color": "#E0F9D7" - }, - { - "alias": "/.*sdd2.*/", - "color": "#FCEACA" - }, - { - "alias": "/.*sde3.*/", - "color": "#F9E2D2" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_read_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", - "hide": false, - "intervalFactor": 4, - "legendFormat": "{{device}} - Read time", - "refId": "A", - "step": 8 - }, - { - "expr": "irate(node_disk_write_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{device}} - Write time", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Disk R/W Time", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": "time. read (-) / write (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 39 - }, - "hiddenSeries": false, - "id": 35, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*sda_.*/", - "color": "#7EB26D" - }, - { - "alias": "/.*sdb_.*/", - "color": "#EAB839" - }, - { - "alias": "/.*sdc_.*/", - "color": "#6ED0E0" - }, - { - "alias": "/.*sdd_.*/", - "color": "#EF843C" - }, - { - "alias": "/.*sde_.*/", - "color": "#E24D42" - }, - { - "alias": "/.*sda1.*/", - "color": "#584477" - }, - { - "alias": "/.*sda2_.*/", - "color": "#BA43A9" - }, - { - "alias": "/.*sda3_.*/", - "color": "#F4D598" - }, - { - "alias": "/.*sdb1.*/", - "color": "#0A50A1" - }, - { - "alias": "/.*sdb2.*/", - "color": "#BF1B00" - }, - { - "alias": "/.*sdb3.*/", - "color": "#E0752D" - }, - { - "alias": "/.*sdc1.*/", - "color": "#962D82" - }, - { - "alias": "/.*sdc2.*/", - "color": "#614D93" - }, - { - "alias": "/.*sdc3.*/", - "color": "#9AC48A" - }, - { - "alias": "/.*sdd1.*/", - "color": "#65C5DB" - }, - { - "alias": "/.*sdd2.*/", - "color": "#F9934E" - }, - { - "alias": "/.*sdd3.*/", - "color": "#EA6460" - }, - { - "alias": "/.*sde1.*/", - "color": "#E0F9D7" - }, - { - "alias": "/.*sdd2.*/", - "color": "#FCEACA" - }, - { - "alias": "/.*sde3.*/", - "color": "#F9E2D2" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_io_time_weighted_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", - "intervalFactor": 4, - "legendFormat": "{{device}} - IO time weighted", - "refId": "A", - "step": 8 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Disk IOs Weighted", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": "time", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 49 - }, - "hiddenSeries": false, - "id": 133, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Read.*/", - "transform": "negative-Y" - }, - { - "alias": "/.*sda_.*/", - "color": "#7EB26D" - }, - { - "alias": "/.*sdb_.*/", - "color": "#EAB839" - }, - { - "alias": "/.*sdc_.*/", - "color": "#6ED0E0" - }, - { - "alias": "/.*sdd_.*/", - "color": "#EF843C" - }, - { - "alias": "/.*sde_.*/", - "color": "#E24D42" - }, - { - "alias": "/.*sda1.*/", - "color": "#584477" - }, - { - "alias": "/.*sda2_.*/", - "color": "#BA43A9" - }, - { - "alias": "/.*sda3_.*/", - "color": "#F4D598" - }, - { - "alias": "/.*sdb1.*/", - "color": "#0A50A1" - }, - { - "alias": "/.*sdb2.*/", - "color": "#BF1B00" - }, - { - "alias": "/.*sdb3.*/", - "color": "#E0752D" - }, - { - "alias": "/.*sdc1.*/", - "color": "#962D82" - }, - { - "alias": "/.*sdc2.*/", - "color": "#614D93" - }, - { - "alias": "/.*sdc3.*/", - "color": "#9AC48A" - }, - { - "alias": "/.*sdd1.*/", - "color": "#65C5DB" - }, - { - "alias": "/.*sdd2.*/", - "color": "#F9934E" - }, - { - "alias": "/.*sdd3.*/", - "color": "#EA6460" - }, - { - "alias": "/.*sde1.*/", - "color": "#E0F9D7" - }, - { - "alias": "/.*sdd2.*/", - "color": "#FCEACA" - }, - { - "alias": "/.*sde3.*/", - "color": "#F9E2D2" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_reads_merged_total{instance=\"$node\",job=\"$job\"}[5m])", - "intervalFactor": 2, - "legendFormat": "{{device}} - Read merged", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_disk_writes_merged_total{instance=\"$node\",job=\"$job\"}[5m])", - "intervalFactor": 2, - "legendFormat": "{{device}} - Write merged", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Disk R/W Merged", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "iops", - "label": "I/Os", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 3, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 49 - }, - "hiddenSeries": false, - "id": 36, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*sda_.*/", - "color": "#7EB26D" - }, - { - "alias": "/.*sdb_.*/", - "color": "#EAB839" - }, - { - "alias": "/.*sdc_.*/", - "color": "#6ED0E0" - }, - { - "alias": "/.*sdd_.*/", - "color": "#EF843C" - }, - { - "alias": "/.*sde_.*/", - "color": "#E24D42" - }, - { - "alias": "/.*sda1.*/", - "color": "#584477" - }, - { - "alias": "/.*sda2_.*/", - "color": "#BA43A9" - }, - { - "alias": "/.*sda3_.*/", - "color": "#F4D598" - }, - { - "alias": "/.*sdb1.*/", - "color": "#0A50A1" - }, - { - "alias": "/.*sdb2.*/", - "color": "#BF1B00" - }, - { - "alias": "/.*sdb3.*/", - "color": "#E0752D" - }, - { - "alias": "/.*sdc1.*/", - "color": "#962D82" - }, - { - "alias": "/.*sdc2.*/", - "color": "#614D93" - }, - { - "alias": "/.*sdc3.*/", - "color": "#9AC48A" - }, - { - "alias": "/.*sdd1.*/", - "color": "#65C5DB" - }, - { - "alias": "/.*sdd2.*/", - "color": "#F9934E" - }, - { - "alias": "/.*sdd3.*/", - "color": "#EA6460" - }, - { - "alias": "/.*sde1.*/", - "color": "#E0F9D7" - }, - { - "alias": "/.*sdd2.*/", - "color": "#FCEACA" - }, - { - "alias": "/.*sde3.*/", - "color": "#F9E2D2" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", - "intervalFactor": 4, - "legendFormat": "{{device}} - IO time", - "refId": "A", - "step": 8 - }, - { - "expr": "irate(node_disk_discard_time_seconds_total{instance=\"$node\",job=\"$job\"}[5m])", - "interval": "", - "intervalFactor": 4, - "legendFormat": "{{device}} - discard time", - "refId": "B", - "step": 8 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Time Spent Doing I/Os", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": "time", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 59 - }, - "hiddenSeries": false, - "id": 34, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*sda_.*/", - "color": "#7EB26D" - }, - { - "alias": "/.*sdb_.*/", - "color": "#EAB839" - }, - { - "alias": "/.*sdc_.*/", - "color": "#6ED0E0" - }, - { - "alias": "/.*sdd_.*/", - "color": "#EF843C" - }, - { - "alias": "/.*sde_.*/", - "color": "#E24D42" - }, - { - "alias": "/.*sda1.*/", - "color": "#584477" - }, - { - "alias": "/.*sda2_.*/", - "color": "#BA43A9" - }, - { - "alias": "/.*sda3_.*/", - "color": "#F4D598" - }, - { - "alias": "/.*sdb1.*/", - "color": "#0A50A1" - }, - { - "alias": "/.*sdb2.*/", - "color": "#BF1B00" - }, - { - "alias": "/.*sdb3.*/", - "color": "#E0752D" - }, - { - "alias": "/.*sdc1.*/", - "color": "#962D82" - }, - { - "alias": "/.*sdc2.*/", - "color": "#614D93" - }, - { - "alias": "/.*sdc3.*/", - "color": "#9AC48A" - }, - { - "alias": "/.*sdd1.*/", - "color": "#65C5DB" - }, - { - "alias": "/.*sdd2.*/", - "color": "#F9934E" - }, - { - "alias": "/.*sdd3.*/", - "color": "#EA6460" - }, - { - "alias": "/.*sde1.*/", - "color": "#E0F9D7" - }, - { - "alias": "/.*sdd2.*/", - "color": "#FCEACA" - }, - { - "alias": "/.*sde3.*/", - "color": "#F9E2D2" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_io_now{instance=\"$node\",job=\"$job\"}[5m])", - "intervalFactor": 4, - "legendFormat": "{{device}} - IO now", - "refId": "A", - "step": 8 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Disk IOs Current in Progress", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "iops", - "label": "I/Os", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 59 - }, - "hiddenSeries": false, - "id": 301, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null as zero", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:2034", - "alias": "/.*sda_.*/", - "color": "#7EB26D" - }, - { - "$$hashKey": "object:2035", - "alias": "/.*sdb_.*/", - "color": "#EAB839" - }, - { - "$$hashKey": "object:2036", - "alias": "/.*sdc_.*/", - "color": "#6ED0E0" - }, - { - "$$hashKey": "object:2037", - "alias": "/.*sdd_.*/", - "color": "#EF843C" - }, - { - "$$hashKey": "object:2038", - "alias": "/.*sde_.*/", - "color": "#E24D42" - }, - { - "$$hashKey": "object:2039", - "alias": "/.*sda1.*/", - "color": "#584477" - }, - { - "$$hashKey": "object:2040", - "alias": "/.*sda2_.*/", - "color": "#BA43A9" - }, - { - "$$hashKey": "object:2041", - "alias": "/.*sda3_.*/", - "color": "#F4D598" - }, - { - "$$hashKey": "object:2042", - "alias": "/.*sdb1.*/", - "color": "#0A50A1" - }, - { - "$$hashKey": "object:2043", - "alias": "/.*sdb2.*/", - "color": "#BF1B00" - }, - { - "$$hashKey": "object:2044", - "alias": "/.*sdb3.*/", - "color": "#E0752D" - }, - { - "$$hashKey": "object:2045", - "alias": "/.*sdc1.*/", - "color": "#962D82" - }, - { - "$$hashKey": "object:2046", - "alias": "/.*sdc2.*/", - "color": "#614D93" - }, - { - "$$hashKey": "object:2047", - "alias": "/.*sdc3.*/", - "color": "#9AC48A" - }, - { - "$$hashKey": "object:2048", - "alias": "/.*sdd1.*/", - "color": "#65C5DB" - }, - { - "$$hashKey": "object:2049", - "alias": "/.*sdd2.*/", - "color": "#F9934E" - }, - { - "$$hashKey": "object:2050", - "alias": "/.*sdd3.*/", - "color": "#EA6460" - }, - { - "$$hashKey": "object:2051", - "alias": "/.*sde1.*/", - "color": "#E0F9D7" - }, - { - "$$hashKey": "object:2052", - "alias": "/.*sdd2.*/", - "color": "#FCEACA" - }, - { - "$$hashKey": "object:2053", - "alias": "/.*sde3.*/", - "color": "#F9E2D2" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_discards_completed_total{instance=\"$node\",job=\"$job\"}[5m])", - "interval": "", - "intervalFactor": 4, - "legendFormat": "{{device}} - Discards completed", - "refId": "A", - "step": 8 - }, - { - "expr": "irate(node_disk_discards_merged_total{instance=\"$node\",job=\"$job\"}[5m])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{device}} - Discards merged", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Disk IOps Discards completed / merged", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:2186", - "format": "iops", - "label": "IOs", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:2187", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "Storage Disk", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 271, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": 3, - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 78 - }, - "hiddenSeries": false, - "id": 43, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{mountpoint}} - Available", - "metric": "", - "refId": "A", - "step": 4 - }, - { - "expr": "node_filesystem_free_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{mountpoint}} - Free", - "refId": "B", - "step": 2 - }, - { - "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "{{mountpoint}} - Size", - "refId": "C", - "step": 2 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Filesystem space available", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:3826", - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:3827", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 78 - }, - "hiddenSeries": false, - "id": 41, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_filesystem_files_free{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{mountpoint}} - Free file nodes", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "File Nodes Free", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:3894", - "format": "short", - "label": "file nodes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:3895", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 88 - }, - "hiddenSeries": false, - "id": 28, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_filefd_maximum{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 4, - "legendFormat": "Max open files", - "refId": "A", - "step": 8 - }, - { - "expr": "node_filefd_allocated{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Open files", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "File Descriptor", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "files", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 88 - }, - "hiddenSeries": false, - "id": 219, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_filesystem_files{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{mountpoint}} - File nodes total", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "File Nodes Size", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "file Nodes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - "/ ReadOnly": "#890F02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 98 - }, - "hiddenSeries": false, - "id": 44, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 6, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_filesystem_readonly{instance=\"$node\",job=\"$job\",device!~'rootfs'}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mountpoint}} - ReadOnly", - "refId": "A", - "step": 4 - }, - { - "expr": "node_filesystem_device_error{instance=\"$node\",job=\"$job\",device!~'rootfs',fstype!~'tmpfs'}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{mountpoint}} - Device error", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Filesystem in ReadOnly / Error", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:3670", - "format": "short", - "label": "counter", - "logBase": 1, - "max": "1", - "min": "0", - "show": true - }, - { - "$$hashKey": "object:3671", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "Storage Filesystem", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 272, - "panels": [ - { - "aliasColors": { - "receive_packets_eth0": "#7EB26D", - "receive_packets_lo": "#E24D42", - "transmit_packets_eth0": "#7EB26D", - "transmit_packets_lo": "#E24D42" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 30 - }, - "hiddenSeries": false, - "id": 60, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Trans.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_packets_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{device}} - Receive", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_network_transmit_packets_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{device}} - Transmit", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Traffic by Packets", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "pps", - "label": "packets out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 30 - }, - "hiddenSeries": false, - "id": 142, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Trans.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_errs_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Receive errors", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_network_transmit_errs_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Rransmit errors", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Traffic Errors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "pps", - "label": "packets out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 40 - }, - "hiddenSeries": false, - "id": 143, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Trans.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_drop_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Receive drop", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_network_transmit_drop_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Transmit drop", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Traffic Drop", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "pps", - "label": "packets out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 40 - }, - "hiddenSeries": false, - "id": 141, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Trans.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_compressed_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Receive compressed", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_network_transmit_compressed_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Transmit compressed", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Traffic Compressed", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "pps", - "label": "packets out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 50 - }, - "hiddenSeries": false, - "id": 146, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Trans.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_multicast_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Receive multicast", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Traffic Multicast", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "pps", - "label": "packets out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 50 - }, - "hiddenSeries": false, - "id": 144, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Trans.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_fifo_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Receive fifo", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_network_transmit_fifo_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Transmit fifo", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Traffic Fifo", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "pps", - "label": "packets out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 60 - }, - "hiddenSeries": false, - "id": 145, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:576", - "alias": "/.*Trans.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_receive_frame_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{device}} - Receive frame", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Traffic Frame", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:589", - "format": "pps", - "label": "packets out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:590", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 60 - }, - "hiddenSeries": false, - "id": 231, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_transmit_carrier_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Statistic transmit_carrier", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Traffic Carrier", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 70 - }, - "hiddenSeries": false, - "id": 232, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Trans.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_network_transmit_colls_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}} - Transmit colls", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Traffic Colls", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 70 - }, - "hiddenSeries": false, - "id": 61, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:663", - "alias": "NF conntrack limit", - "color": "#890F02", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_nf_conntrack_entries{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "NF conntrack entries", - "refId": "A", - "step": 4 - }, - { - "expr": "node_nf_conntrack_entries_limit{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "NF conntrack limit", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "NF Contrack", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:678", - "format": "short", - "label": "entries", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:679", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 80 - }, - "hiddenSeries": false, - "id": 230, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_arp_entries{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ device }} - ARP entries", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "ARP Entries", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Entries", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 80 - }, - "hiddenSeries": false, - "id": 288, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_network_mtu_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ device }} - Bytes", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "MTU", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 90 - }, - "hiddenSeries": false, - "id": 280, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_network_speed_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ device }} - Speed", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Speed", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 90 - }, - "hiddenSeries": false, - "id": 289, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_network_transmit_queue_length{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ device }} - Interface transmit queue length", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Queue Length", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "none", - "label": "packets", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 100 - }, - "hiddenSeries": false, - "id": 290, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:232", - "alias": "/.*Dropped.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_softnet_processed_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "CPU {{cpu}} - Processed", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_softnet_dropped_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "CPU {{cpu}} - Dropped", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Softnet Packets", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:207", - "format": "short", - "label": "packetes drop (-) / process (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:208", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 100 - }, - "hiddenSeries": false, - "id": 310, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_softnet_times_squeezed_total{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "CPU {{cpu}} - Squeezed", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Softnet Out of Quota", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:207", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:208", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 110 - }, - "hiddenSeries": false, - "id": 309, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_network_up{operstate=\"up\",instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{interface}} - Operational state UP", - "refId": "A", - "step": 4 - }, - { - "expr": "node_network_carrier{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "instant": false, - "legendFormat": "{{device}} - Physical link state", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network Operational Status", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "Network Traffic", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 31 - }, - "id": 273, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 13 - }, - "hiddenSeries": false, - "id": 63, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_sockstat_TCP_alloc{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "TCP_alloc - Allocated sockets", - "refId": "A", - "step": 240 - }, - { - "expr": "node_sockstat_TCP_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "TCP_inuse - Tcp sockets currently in use", - "refId": "B", - "step": 240 - }, - { - "expr": "node_sockstat_TCP_mem{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "TCP_mem - Used memory for tcp", - "refId": "C", - "step": 240 - }, - { - "expr": "node_sockstat_TCP_orphan{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "TCP_orphan - Orphan sockets", - "refId": "D", - "step": 240 - }, - { - "expr": "node_sockstat_TCP_tw{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "TCP_tw - Sockets wating close", - "refId": "E", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Sockstat TCP", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 13 - }, - "hiddenSeries": false, - "id": 124, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_sockstat_UDPLITE_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "UDPLITE_inuse - Udplite sockets currently in use", - "refId": "A", - "step": 240 - }, - { - "expr": "node_sockstat_UDP_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "UDP_inuse - Udp sockets currently in use", - "refId": "B", - "step": 240 - }, - { - "expr": "node_sockstat_UDP_mem{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "UDP_mem - Used memory for udp", - "refId": "C", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Sockstat UDP", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 23 - }, - "hiddenSeries": false, - "id": 126, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_sockstat_sockets_used{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Sockets_used - Sockets currently in use", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Sockstat Used", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "sockets", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 23 - }, - "hiddenSeries": false, - "id": 220, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_sockstat_TCP_mem_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "mem_bytes - TCP sockets in that state", - "refId": "A", - "step": 240 - }, - { - "expr": "node_sockstat_UDP_mem_bytes{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "mem_bytes - UDP sockets in that state", - "refId": "B", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Sockstat Memory Size", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "bytes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 33 - }, - "hiddenSeries": false, - "id": 125, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_sockstat_FRAG_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "FRAG_inuse - Frag sockets currently in use", - "refId": "A", - "step": 240 - }, - { - "expr": "node_sockstat_FRAG_memory{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "FRAG_memory - Used memory for frag", - "refId": "B", - "step": 240 - }, - { - "expr": "node_sockstat_RAW_inuse{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "RAW_inuse - Raw sockets currently in use", - "refId": "C", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Sockstat FRAG / RAW", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1572", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:1573", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "Network Sockstat", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 274, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 32 - }, - "height": "", - "hiddenSeries": false, - "id": 221, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 12, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:1876", - "alias": "/.*Out.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_netstat_IpExt_InOctets{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "InOctets - Received octets", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_netstat_IpExt_OutOctets{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "OutOctets - Sent octets", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Netstat IP In / Out Octets", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1889", - "format": "short", - "label": "octects out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1890", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 32 - }, - "height": "", - "hiddenSeries": false, - "id": 81, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sideWidth": 300, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_netstat_Ip_Forwarding{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Forwarding - IP forwarding", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Netstat IP Forwarding", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1957", - "format": "short", - "label": "datagrams", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:1958", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 42 - }, - "height": "", - "hiddenSeries": false, - "id": 115, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 12, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Out.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_netstat_Icmp_InMsgs{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "InMsgs - Messages which the entity received. Note that this counter includes all those counted by icmpInErrors", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_netstat_Icmp_OutMsgs{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "OutMsgs - Messages which this entity attempted to send. Note that this counter includes all those counted by icmpOutErrors", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "ICMP In / Out", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "messages out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 42 - }, - "height": "", - "hiddenSeries": false, - "id": 50, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 12, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Out.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_netstat_Icmp_InErrors{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "InErrors - Messages which the entity received but determined as having ICMP-specific errors (bad ICMP checksums, bad length, etc.)", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "ICMP Errors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "messages out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 52 - }, - "height": "", - "hiddenSeries": false, - "id": 55, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 12, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Out.*/", - "transform": "negative-Y" - }, - { - "alias": "/.*Snd.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_netstat_Udp_InDatagrams{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "InDatagrams - Datagrams received", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_netstat_Udp_OutDatagrams{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "OutDatagrams - Datagrams sent", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "UDP In / Out", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "datagrams out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 52 - }, - "height": "", - "hiddenSeries": false, - "id": 109, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 12, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_netstat_Udp_InErrors{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "InErrors - UDP Datagrams that could not be delivered to an application", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_netstat_Udp_NoPorts{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "NoPorts - UDP Datagrams received on a port with no listener", - "refId": "B", - "step": 4 - }, - { - "expr": "irate(node_netstat_UdpLite_InErrors{instance=\"$node\",job=\"$job\"}[5m])", - "interval": "", - "legendFormat": "InErrors Lite - UDPLite Datagrams that could not be delivered to an application", - "refId": "C" - }, - { - "expr": "irate(node_netstat_Udp_RcvbufErrors{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "RcvbufErrors - UDP buffer errors received", - "refId": "D", - "step": 4 - }, - { - "expr": "irate(node_netstat_Udp_SndbufErrors{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "SndbufErrors - UDP buffer errors send", - "refId": "E", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "UDP Errors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:4232", - "format": "short", - "label": "datagrams", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:4233", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 62 - }, - "height": "", - "hiddenSeries": false, - "id": 299, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 12, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Out.*/", - "transform": "negative-Y" - }, - { - "alias": "/.*Snd.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_netstat_Tcp_InSegs{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "InSegs - Segments received, including those received in error. This count includes segments received on currently established connections", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_netstat_Tcp_OutSegs{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "OutSegs - Segments sent, including those on current connections but excluding those containing only retransmitted octets", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "TCP In / Out", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "datagrams out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 62 - }, - "height": "", - "hiddenSeries": false, - "id": 104, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 12, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_netstat_TcpExt_ListenOverflows{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "ListenOverflows - Times the listen queue of a socket overflowed", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_netstat_TcpExt_ListenDrops{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "ListenDrops - SYNs to LISTEN sockets ignored", - "refId": "B", - "step": 4 - }, - { - "expr": "irate(node_netstat_TcpExt_TCPSynRetrans{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "TCPSynRetrans - SYN-SYN/ACK retransmits to break down retransmissions in SYN, fast/timeout retransmits", - "refId": "C", - "step": 4 - }, - { - "expr": "irate(node_netstat_Tcp_RetransSegs{instance=\"$node\",job=\"$job\"}[5m])", - "interval": "", - "legendFormat": "RetransSegs - Segments retransmitted - that is, the number of TCP segments transmitted containing one or more previously transmitted octets", - "refId": "D" - }, - { - "expr": "irate(node_netstat_Tcp_InErrs{instance=\"$node\",job=\"$job\"}[5m])", - "interval": "", - "legendFormat": "InErrs - Segments received in error (e.g., bad TCP checksums)", - "refId": "E" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "TCP Errors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 72 - }, - "height": "", - "hiddenSeries": false, - "id": 85, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 12, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:454", - "alias": "/.*MaxConn *./", - "color": "#890F02", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_netstat_Tcp_CurrEstab{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "CurrEstab - TCP connections for which the current state is either ESTABLISHED or CLOSE- WAIT", - "refId": "A", - "step": 4 - }, - { - "expr": "node_netstat_Tcp_MaxConn{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "MaxConn - Limit on the total number of TCP connections the entity can support (Dinamic is \"-1\")", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "TCP Connections", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:469", - "format": "short", - "label": "connections", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:470", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 72 - }, - "height": "", - "hiddenSeries": false, - "id": 91, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 12, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/.*Sent.*/", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_netstat_TcpExt_SyncookiesFailed{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "SyncookiesFailed - Invalid SYN cookies received", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_netstat_TcpExt_SyncookiesRecv{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "SyncookiesRecv - SYN cookies received", - "refId": "B", - "step": 4 - }, - { - "expr": "irate(node_netstat_TcpExt_SyncookiesSent{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "SyncookiesSent - SYN cookies sent", - "refId": "C", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "TCP SynCookie", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "counter out (-) / in (+)", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 82 - }, - "height": "", - "hiddenSeries": false, - "id": 82, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxPerRow": 12, - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_netstat_Tcp_ActiveOpens{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "ActiveOpens - TCP connections that have made a direct transition to the SYN-SENT state from the CLOSED state", - "refId": "A", - "step": 4 - }, - { - "expr": "irate(node_netstat_Tcp_PassiveOpens{instance=\"$node\",job=\"$job\"}[5m])", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "PassiveOpens - TCP connections that have made a direct transition to the SYN-RCVD state from the LISTEN state", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "TCP Direct Transition", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "connections", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "Network Netstat", - "type": "row" - }, - { - "collapsed": true, - "datasource": "${DS_PROMETHEUS}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 279, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 54 - }, - "hiddenSeries": false, - "id": 40, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_scrape_collector_duration_seconds{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{collector}} - Scrape duration", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Node Exporter Scrape Time", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": "seconds", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "", - "fill": 2, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 54 - }, - "hiddenSeries": false, - "id": 157, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "dataLinks": [] - }, - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "$$hashKey": "object:1969", - "alias": "/.*error.*/", - "color": "#F2495C", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "node_scrape_collector_success{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{collector}} - Scrape success", - "refId": "A", - "step": 4 - }, - { - "expr": "node_textfile_scrape_error{instance=\"$node\",job=\"$job\"}", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{collector}} - Scrape textfile error (1 = true)", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Node Exporter Scrape", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1484", - "format": "short", - "label": "counter", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1485", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "repeat": null, - "title": "Node Exporter", - "type": "row" - } - ], - "refresh": "1m", - "schemaVersion": 22, - "style": "dark", - "tags": [ - "linux" - ], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "default", - "value": "default" - }, - "hide": 0, - "includeAll": false, - "label": "datasource", - "multi": false, - "name": "DS_PROMETHEUS", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "definition": "", - "hide": 0, - "includeAll": false, - "index": -1, - "label": "Job", - "multi": false, - "name": "job", - "options": [], - "query": "label_values(node_uname_info, job)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(node_uname_info{job=\"$job\"}, instance)", - "hide": 0, - "includeAll": false, - "index": -1, - "label": "Host:", - "multi": false, - "name": "node", - "options": [], - "query": "label_values(node_uname_info{job=\"$job\"}, instance)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "selected": false, - "text": "[a-z]+|nvme[0-9]+n[0-9]+", - "value": "[a-z]+|nvme[0-9]+n[0-9]+" - }, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "diskdevices", - "options": [ - { - "selected": true, - "text": "[a-z]+|nvme[0-9]+n[0-9]+", - "value": "[a-z]+|nvme[0-9]+n[0-9]+" - } - ], - "query": "[a-z]+|nvme[0-9]+n[0-9]+", - "skipUrlSync": false, - "type": "custom" - } - ] - }, - "time": { - "from": "now-4h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Node Exporter", - "version": 1 -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/nomad.json b/terraform-ci-infra/1n_nmd/grafana/conf/nomad.json deleted file mode 100644 index 40ffeddf7b..0000000000 --- a/terraform-ci-infra/1n_nmd/grafana/conf/nomad.json +++ /dev/null @@ -1,869 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_PROMETHEUS", - "label": "Prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.3.2" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "Nomad Jobs metrics", - "editable": true, - "gnetId": 12787, - "graphTooltip": 0, - "id": null, - "iteration": 1596708119930, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": null, - "format": "dtdurations", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 0 - }, - "id": 16, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "alias": "", - "expr": "max(nomad_client_uptime{instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "", - "rawSql": "SELECT\n UNIX_TIMESTAMP() as time_sec,\n as value,\n as metric\nFROM \nWHERE $__timeFilter(time_column)\nORDER BY ASC\n", - "refId": "A" - } - ], - "thresholds": "", - "title": "Uptime", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorPrefix": false, - "colorValue": false, - "colors": [ - "#7eb26d", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 0 - }, - "id": 17, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "alias": "", - "expr": "count(sum(nomad_client_allocs_memory_cache) by (exported_job))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "", - "rawSql": "SELECT\n UNIX_TIMESTAMP() as time_sec,\n as value,\n as metric\nFROM
\nWHERE $__timeFilter(time_column)\nORDER BY ASC\n", - "refId": "A" - } - ], - "thresholds": "", - "title": "Jobs", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorPrefix": false, - "colorValue": false, - "colors": [ - "#7eb26d", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "${DS_PROMETHEUS}", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 0 - }, - "id": 12, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "alias": "", - "expr": "sum(nomad_client_allocations_running{datacenter=\"$datacenter\",instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "", - "rawSql": "SELECT\n UNIX_TIMESTAMP() as time_sec,\n as value,\n as metric\nFROM
\nWHERE $__timeFilter(time_column)\nORDER BY ASC\n", - "refId": "A" - } - ], - "thresholds": "", - "title": "Allocs", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 4, - "w": 12, - "x": 12, - "y": 0 - }, - "id": 14, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "alias": "", - "expr": "sum(nomad_client_allocations_blocked{datacenter=\"$datacenter\",instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Blocked", - "rawSql": "SELECT\n UNIX_TIMESTAMP() as time_sec,\n as value,\n as metric\nFROM
\nWHERE $__timeFilter(time_column)\nORDER BY ASC\n", - "refId": "A" - }, - { - "expr": "sum(nomad_client_allocations_pending{datacenter=\"$datacenter\",instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Pending", - "refId": "B" - }, - { - "expr": "sum(nomad_client_allocations_restart{datacenter=\"$datacenter\",instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "Restart ", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Block/Peding/Restart", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 13 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "host", - "repeatDirection": "v", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(nomad_client_allocs_cpu_total_percent[5m:10s]) > 1", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{task}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage Percent", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 3, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 13 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "host", - "repeatDirection": "v", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg(nomad_client_allocs_cpu_total_ticks{instance=~\"$instance\"}) by(exported_job, task)", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{task}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "CPU Total Ticks", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 3, - "format": "timeticks", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 6, - "w": 12, - "x": 0, - "y": 19 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "host", - "repeatDirection": "v", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg(nomad_client_allocs_memory_rss{instance=~\"$instance\"}) by(exported_job, task)", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{task}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "RSS", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 3, - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 19 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "host", - "repeatDirection": "v", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg(nomad_client_allocs_memory_cache{instance=~\"$instance\"}) by(exported_job, task)", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{task}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Memory Cache", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 3, - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": false, - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "default", - "value": "default" - }, - "hide": 0, - "includeAll": false, - "label": "datasource", - "multi": false, - "name": "DS_PROMETHEUS", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": false, - "label": "DC", - "multi": false, - "name": "datacenter", - "options": [], - "query": "label_values(nomad_client_uptime, datacenter)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": true, - "label": "Host", - "multi": true, - "name": "instance", - "options": [], - "query": "label_values(nomad_client_uptime{datacenter=~\"$datacenter\"}, instance)", - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-4h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Nomad", - "version": 1 -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/grafana/conf/nomad/grafana.hcl b/terraform-ci-infra/1n_nmd/grafana/conf/nomad/grafana.hcl deleted file mode 100644 index a759abc4f7..0000000000 --- a/terraform-ci-infra/1n_nmd/grafana/conf/nomad/grafana.hcl +++ /dev/null @@ -1,353 +0,0 @@ -job "${job_name}" { - # The "region" parameter specifies the region in which to execute the job. - # If omitted, this inherits the default region name of "global". - # region = "global" - # - # The "datacenters" parameter specifies the list of datacenters which should - # be considered when placing this task. This must be provided. - datacenters = "${datacenters}" - - # The "type" parameter controls the type of job, which impacts the scheduler's - # decision on placement. This configuration is optional and defaults to - # "service". For a full list of job types and their differences, please see - # the online documentation. - # - # For more information, please see the online documentation at: - # - # https://www.nomadproject.io/docs/jobspec/schedulers - # - type = "service" - - update { - # The "max_parallel" parameter specifies the maximum number of updates to - # perform in parallel. In this case, this specifies to update a single task - # at a time. - max_parallel = 1 - - health_check = "checks" - - # The "min_healthy_time" parameter specifies the minimum time the allocation - # must be in the healthy state before it is marked as healthy and unblocks - # further allocations from being updated. - min_healthy_time = "10s" - - # The "healthy_deadline" parameter specifies the deadline in which the - # allocation must be marked as healthy after which the allocation is - # automatically transitioned to unhealthy. Transitioning to unhealthy will - # fail the deployment and potentially roll back the job if "auto_revert" is - # set to true. - healthy_deadline = "3m" - - # The "progress_deadline" parameter specifies the deadline in which an - # allocation must be marked as healthy. The deadline begins when the first - # allocation for the deployment is created and is reset whenever an allocation - # as part of the deployment transitions to a healthy state. If no allocation - # transitions to the healthy state before the progress deadline, the - # deployment is marked as failed. - progress_deadline = "10m" - -%{ if use_canary } - # The "canary" parameter specifies that changes to the job that would result - # in destructive updates should create the specified number of canaries - # without stopping any previous allocations. Once the operator determines the - # canaries are healthy, they can be promoted which unblocks a rolling update - # of the remaining allocations at a rate of "max_parallel". - # - # Further, setting "canary" equal to the count of the task group allows - # blue/green deployments. When the job is updated, a full set of the new - # version is deployed and upon promotion the old version is stopped. - canary = 1 - - # Specifies if the job should auto-promote to the canary version when all - # canaries become healthy during a deployment. Defaults to false which means - # canaries must be manually updated with the nomad deployment promote - # command. - auto_promote = true - - # The "auto_revert" parameter specifies if the job should auto-revert to the - # last stable job on deployment failure. A job is marked as stable if all the - # allocations as part of its deployment were marked healthy. - auto_revert = true -%{ endif } - } - - # The reschedule stanza specifies the group's rescheduling strategy. If - # specified at the job level, the configuration will apply to all groups - # within the job. If the reschedule stanza is present on both the job and the - # group, they are merged with the group stanza taking the highest precedence - # and then the job. - reschedule { - delay = "30s" - delay_function = "constant" - unlimited = true - } - - # The "group" stanza defines a series of tasks that should be co-located on - # the same Nomad client. Any task within a group will be placed on the same - # client. - # - # For more information and examples on the "group" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/group - # - group "prod-group1-${service_name}" { - # The "count" parameter specifies the number of the task groups that should - # be running under this group. This value must be non-negative and defaults - # to 1. - count = ${group_count} - - # The restart stanza configures a tasks's behavior on task failure. Restarts - # happen on the client that is running the task. - # - # https://www.nomadproject.io/docs/job-specification/restart - # - restart { - interval = "30m" - attempts = 40 - delay = "15s" - mode = "delay" - } - - # The constraint allows restricting the set of eligible nodes. Constraints - # may filter on attributes or client metadata. - # - # For more information and examples on the "volume" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/constraint - # - constraint { - attribute = "$${attr.cpu.arch}" - operator = "!=" - value = "arm64" - } - - # The "task" stanza creates an individual unit of work, such as a Docker - # container, web application, or batch processing. - # - # For more information and examples on the "task" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/task - # - task "prod-task1-${service_name}" { - # The "driver" parameter specifies the task driver that should be used to - # run the task. - driver = "docker" - - # The "config" stanza specifies the driver configuration, which is passed - # directly to the driver to start the task. The details of configurations - # are specific to each driver, so please see specific driver - # documentation for more information. - config { - image = "${image}" - dns_servers = [ "172.17.0.1" ] - volumes = [ - "secrets/prometheus.yml:/etc/grafana/provisioning/datasources/prometheus.yml", - "secrets/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml", - "secrets/grafana.ini:/etc/grafana/grafana.ini", - "secrets/node_exporter.json:/etc/grafana/provisioning/dashboards/node_exporter.json", - "secrets/docker_cadvisor.json:/etc/grafana/provisioning/dashboards/docker_cadvisor.json", - "secrets/nomad.json:/etc/grafana/provisioning/dashboards/nomad.json", - "secrets/consul.json:/etc/grafana/provisioning/dashboards/consul.json", - "secrets/prometheus.json:/etc/grafana/provisioning/dashboards/prometheus.json", - "secrets/blackbox_exporter_http.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_http.json", - "secrets/blackbox_exporter_icmp.json:/etc/grafana/provisioning/dashboards/blackbox_exporter_icmp.json" - ] - } - - artifact { - # Prometheus Node Exporter - source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/node_exporter.json" - destination = "secrets/" - } - - artifact { - # Docker cAdvisor - source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/docker_cadvisor.json" - destination = "secrets/" - } - - artifact { - # Nomad - source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/nomad.json" - destination = "secrets/" - } - - artifact { - # Consul - source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/consul.json" - destination = "secrets/" - } - - artifact { - # Prometheus - source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/prometheus.json" - destination = "secrets/" - } - - artifact { - # Prometheus Blackbox Exporter HTTP - source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_http.json" - destination = "secrets/" - } - - artifact { - # Prometheus Blackbox Exporter ICMP - source = "https://raw.githubusercontent.com/pmikus/grafana-dashboards/main/blackbox_exporter_icmp.json" - destination = "secrets/" - } - - # The "template" stanza instructs Nomad to manage a template, such as - # a configuration file or script. This template can optionally pull data - # from Consul or Vault to populate runtime configuration data. - # - # For more information and examples on the "template" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/template - # - template { - change_mode = "noop" - change_signal = "SIGINT" - destination = "secrets/prometheus.yml" - data = < 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Failed Connections", - "refId": "A", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_evaluator_iterations_missed_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Missed Iterations", - "refId": "B", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_evaluator_iterations_skipped_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Skipped Iterations", - "refId": "C", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_rule_evaluation_failures_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Evaluation", - "refId": "D", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_sd_azure_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Azure Refresh", - "refId": "E", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_sd_consul_rpc_failures_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Consul RPC", - "refId": "F", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_sd_dns_lookup_failures_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "DNS Lookup", - "refId": "G", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_sd_ec2_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "EC2 Refresh", - "refId": "H", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_sd_gce_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "GCE Refresh", - "refId": "I", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_sd_marathon_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Marathon Refresh", - "refId": "J", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_sd_openstack_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Openstack Refresh", - "refId": "K", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_sd_triton_refresh_failures_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Triton Refresh", - "refId": "L", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_target_scrapes_exceeded_sample_limit_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Sample Limit", - "refId": "M", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Duplicate Timestamp", - "refId": "N", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_bounds_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Timestamp Out of Bounds", - "refId": "O", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_target_scrapes_sample_out_of_order_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Sample Out of Order", - "refId": "P", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_treecache_zookeeper_failures_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Zookeeper", - "refId": "Q", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_tsdb_compactions_failed_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "TSDB Compactions", - "refId": "R", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_tsdb_head_series_not_found{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Series Not Found", - "refId": "S", - "step": 2 - }, - { - "expr": "sum(increase(prometheus_tsdb_reloads_failures_total{instance=~\"$instance\"}[5m])) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Reload", - "refId": "T", - "step": 2 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Failures and Errors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Errors", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "errors", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "up{instance=~\"$instance\",job=~\"$job\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 2 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Upness (stacked)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "none", - "label": "Up", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_tsdb_head_chunks{job=~\"$job\",instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Storage Memory Chunks", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Chunks", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "up", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_tsdb_head_series{job=~\"$job\",instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Series Count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Series", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 32, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "removed", - "transform": "negative-Y" - } - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum( increase(prometheus_tsdb_head_series_created_total{instance=~\"$instance\"}[5m]) )", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "created", - "refId": "A", - "step": 4 - }, - { - "expr": "sum( increase(prometheus_tsdb_head_series_removed_total{instance=~\"$instance\"}[5m]) )", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "removed", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Series Created / Removed", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Series Count", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "series", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": { - "10.58.3.10:80": "#BA43A9" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "Rate of total number of appended samples", - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_tsdb_head_samples_appended_total{job=~\"$job\",instance=~\"$instance\"}[1m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 2 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Appended Samples per Second", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Samples / Second", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "appended samples", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "Total number of syncs that were executed on a scrape pool.", - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_target_scrape_pool_sync_total{job=~\"$job\",instance=~\"$instance\"}) by (scrape_job)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{scrape_job}}", - "refId": "B", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Scrape Sync Total", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Syncs", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "Actual interval to sync the scrape pool.", - "fill": 1, - "id": 21, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(prometheus_target_sync_length_seconds_sum{job=~\"$job\",instance=~\"$instance\"}[2m])) by (scrape_job) * 1000", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{scrape_job}}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Target Sync", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Milliseconds", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "sync", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 29, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "scrape_duration_seconds{instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Scrape Duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Seconds", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "Total number of rejected scrapes", - "fill": 1, - "id": 30, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_target_scrapes_exceeded_sample_limit_total{job=~\"$job\",instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "exceeded sample limit", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(prometheus_target_scrapes_sample_duplicate_timestamp_total{job=~\"$job\",instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "duplicate timestamp", - "refId": "B", - "step": 4 - }, - { - "expr": "sum(prometheus_target_scrapes_sample_out_of_bounds_total{job=~\"$job\",instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "out of bounds", - "refId": "C", - "step": 4 - }, - { - "expr": "sum(prometheus_target_scrapes_sample_out_of_order_total{job=~\"$job\",instance=~\"$instance\"}) ", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "out of order", - "refId": "D", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Rejected Scrapes", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": "Scrapes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "scrapes", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "The duration of rule group evaluations", - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "1000 * rate(prometheus_evaluator_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\"}[5m]) / rate(prometheus_evaluator_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "E", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Average Rule Evaluation Duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Milliseconds", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(http_request_duration_microseconds_count{job=~\"$job\",instance=~\"$instance\"}[1m])) by (handler) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{handler}}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "HTTP Request Duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Microseconds", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(prometheus_engine_query_duration_seconds_sum{job=~\"$job\",instance=~\"$instance\"}) by (slice)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{slice}}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Prometheus Engine Query Duration Seconds", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Seconds", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "Rule-group evaluations \n - total\n - missed due to slow rule group evaluation\n - skipped due to throttled metric storage", - "fill": 1, - "id": 31, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(prometheus_evaluator_iterations_total{job=~\"$job\", instance=~\"$instance\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Total", - "refId": "B", - "step": 4 - }, - { - "expr": "sum(rate(prometheus_evaluator_iterations_missed_total{job=~\"$job\", instance=~\"$instance\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Missed", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(rate(prometheus_evaluator_iterations_skipped_total{job=~\"$job\", instance=~\"$instance\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Skipped", - "refId": "C", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Rule Evaluator Iterations", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "iterations", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "durations", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 22, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(prometheus_notifications_sent_total[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 2 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Notifications Sent", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Notifications", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "notifications", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 23, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "(time() - prometheus_config_last_reload_success_timestamp_seconds{job=~\"$job\",instance=~\"$instance\"}) / 60", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Minutes Since Successful Config Reload", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Minutes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_config_last_reload_successful{job=~\"$job\",instance=~\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 4 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Successful Config Reload", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": "Success", - "logBase": 1, - "max": "1", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "config", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "GC invocation durations", - "fill": 1, - "id": 28, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(go_gc_duration_seconds_sum{instance=~\"$instance\",job=~\"$job\"}[2m])) by (instance)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "A", - "step": 2 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "GC Rate / 2m", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "garbage collection", - "titleSize": "h6" - }, - { - "collapse": true, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "description": "This is probably wrong! Please help.", - "fill": 1, - "id": 26, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "allocated", - "stack": false - } - ], - "spaceLength": 10, - "span": 6, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(go_memstats_alloc_bytes_total{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "legendFormat": "alloc_bytes_total", - "refId": "A", - "step": 10 - }, - { - "expr": "sum(go_memstats_alloc_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "allocated", - "refId": "B", - "step": 10 - }, - { - "expr": "sum(go_memstats_buck_hash_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "profiling bucket hash table", - "refId": "C", - "step": 10 - }, - { - "expr": "sum(go_memstats_gc_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "GC metadata", - "refId": "D", - "step": 10 - }, - { - "expr": "sum(go_memstats_heap_alloc_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "heap in-use", - "refId": "E", - "step": 10 - }, - { - "expr": "sum(go_memstats_heap_idle_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "heap idle", - "refId": "F", - "step": 10 - }, - { - "expr": "sum(go_memstats_heap_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "heap in use", - "refId": "G", - "step": 10 - }, - { - "expr": "sum(go_memstats_heap_released_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "heap released", - "refId": "H", - "step": 10 - }, - { - "expr": "sum(go_memstats_heap_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "heap system", - "refId": "I", - "step": 10 - }, - { - "expr": "sum(go_memstats_mcache_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "mcache in use", - "refId": "J", - "step": 10 - }, - { - "expr": "sum(go_memstats_mcache_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "mcache sys", - "refId": "K", - "step": 10 - }, - { - "expr": "sum(go_memstats_mspan_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "mspan in use", - "refId": "L", - "step": 10 - }, - { - "expr": "sum(go_memstats_mspan_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "mspan sys", - "refId": "M", - "step": 10 - }, - { - "expr": "sum(go_memstats_next_gc_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "heap next gc", - "refId": "N", - "step": 10 - }, - { - "expr": "sum(go_memstats_other_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "other sys", - "refId": "O", - "step": 10 - }, - { - "expr": "sum(go_memstats_stack_inuse_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "stack in use", - "refId": "P", - "step": 10 - }, - { - "expr": "sum(go_memstats_stack_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "stack sys", - "refId": "Q", - "step": 10 - }, - { - "expr": "sum(go_memstats_sys_bytes{job=~\"$job\", instance=~\"$instance\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "sys", - "refId": "R", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Go Memory Usage (FIXME)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "prometheus_target_interval_length_seconds{instance=~\"$instance\", job=~\"$job\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{quantile}} {{interval}}", - "refId": "A", - "step": 20 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Scrape Duration", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Seconds", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "span": 3, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(prometheus_target_interval_length_seconds_count{job=~\"$job\",instance=~\"$instance\"}[5m])) by (interval)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{interval}}", - "refId": "A", - "step": 20 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Target Scrapes / 5m", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "Scrapes", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Broken, ignore", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "default", - "value": "default" - }, - "hide": 0, - "includeAll": false, - "label": "datasource", - "multi": false, - "name": "DS_PROMETHEUS", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" - }, - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": true, - "label": null, - "multi": true, - "name": "job", - "options": [], - "query": "query_result(prometheus_tsdb_head_samples_appended_total)", - "refresh": 2, - "regex": "/.*job=\"([^\"]+)/", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": true, - "label": null, - "multi": true, - "name": "instance", - "options": [], - "query": "query_result(up{job=~\"$job\"})", - "refresh": 2, - "regex": "/.*instance=\"([^\"]+).*/", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "selected": true, - "text": "1h", - "value": "1h" - }, - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "interval", - "options": [ - { - "selected": true, - "text": "1h", - "value": "1h" - }, - { - "selected": false, - "text": "3h", - "value": "3h" - }, - { - "selected": false, - "text": "6h", - "value": "6h" - }, - { - "selected": false, - "text": "12h", - "value": "12h" - }, - { - "selected": false, - "text": "1d", - "value": "1d" - }, - { - "selected": false, - "text": "2d", - "value": "2d" - }, - { - "selected": false, - "text": "7d", - "value": "7d" - }, - { - "selected": false, - "text": "30d", - "value": "30d" - }, - { - "selected": false, - "text": "90d", - "value": "90d" - }, - { - "selected": false, - "text": "180d", - "value": "180d" - } - ], - "query": "1h, 3h, 6h, 12h, 1d, 2d, 7d, 30d, 90d, 180d", - "type": "custom" - } - ] - }, - "time": { - "from": "now-4h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Prometheus", - "version": 1 -} diff --git a/terraform-ci-infra/1n_nmd/grafana/main.tf b/terraform-ci-infra/1n_nmd/grafana/main.tf deleted file mode 100644 index b67ba03985..0000000000 --- a/terraform-ci-infra/1n_nmd/grafana/main.tf +++ /dev/null @@ -1,24 +0,0 @@ -locals { - datacenters = join(",", var.nomad_datacenters) -} - -data "template_file" "nomad_job_grafana" { - template = file("${path.module}/conf/nomad/grafana.hcl") - vars = { - datacenters = local.datacenters - job_name = var.grafana_job_name - use_canary = var.grafana_use_canary - group_count = var.grafana_group_count - service_name = var.grafana_service_name - use_vault_provider = var.grafana_vault_secret.use_vault_provider - image = var.grafana_container_image - cpu = var.grafana_cpu - mem = var.grafana_mem - port = var.grafana_port - } -} - -resource "nomad_job" "nomad_job_grafana" { - jobspec = data.template_file.nomad_job_grafana.rendered - detach = false -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/grafana/variables.tf b/terraform-ci-infra/1n_nmd/grafana/variables.tf deleted file mode 100644 index 0c2382b16a..0000000000 --- a/terraform-ci-infra/1n_nmd/grafana/variables.tf +++ /dev/null @@ -1,66 +0,0 @@ -# Nomad -variable "nomad_datacenters" { - description = "Nomad data centers" - type = list(string) - default = [ "dc1" ] -} - -# Grafana -variable "grafana_job_name" { - description = "Grafana job name" - type = string - default = "grafana" -} - -variable "grafana_group_count" { - description = "Number of grafana group instances" - type = number - default = 1 -} - -variable "grafana_service_name" { - description = "Grafana service name" - type = string - default = "grafana" -} - -variable "grafana_container_image" { - description = "Grafana docker image" - type = string - default = "grafana/grafana:7.3.7" -} - -variable "grafana_use_canary" { - description = "Uses canary deployment" - type = bool - default = false -} - -variable "grafana_vault_secret" { - description = "Set of properties to be able to fetch secret from vault" - type = object({ - use_vault_provider = bool, - vault_kv_policy_name = string, - vault_kv_path = string, - vault_kv_field_access_key = string, - vault_kv_field_secret_key = string - }) -} - -variable "grafana_cpu" { - description = "Grafana CPU allocation" - type = number - default = 2000 -} - -variable "grafana_mem" { - description = "Grafana RAM allocation" - type = number - default = 8192 -} - -variable "grafana_port" { - description = "Grafana TCP allocation" - type = number - default = 3000 -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/main.tf b/terraform-ci-infra/1n_nmd/main.tf deleted file mode 100644 index e3b64f51ed..0000000000 --- a/terraform-ci-infra/1n_nmd/main.tf +++ /dev/null @@ -1,165 +0,0 @@ -# For convenience in simple configurations, a child module automatically -# inherits default (un-aliased) provider configurations from its parent. -# This means that explicit provider blocks appear only in the root module, -# and downstream modules can simply declare resources for that provider -# and have them automatically associated with the root provider -# configurations. -module "alertmanager" { - source = "./alertmanager" - providers = { - nomad = nomad.yul1 - } - - # nomad - nomad_datacenters = [ "yul1" ] - - # alertmanager - alertmanager_job_name = "prod-alertmanager" - alertmanager_use_canary = true - alertmanager_group_count = 1 - alertmanager_vault_secret = { - use_vault_provider = false, - vault_kv_policy_name = "kv-secret", - vault_kv_path = "secret/data/prometheus", - vault_kv_field_access_key = "access_key", - vault_kv_field_secret_key = "secret_key" - } - alertmanager_version = "0.21.0" - alertmanager_cpu = 1000 - alertmanager_mem = 1024 - alertmanager_port = 9093 - alertmanager_slack_jenkins_api_key = "TE07RD1V1/B01LPL8KM0F/KAd80wc9vS8CPMtrNtmQqCfT" - alertmanager_slack_jenkins_channel = "fdio-jobs-monitoring" - alertmanager_slack_default_api_key = "TE07RD1V1/B01L7PQK9S8/vJTSCr3OUprfAEGKBV5uZoJ6" - alertmanager_slack_default_channel = "fdio-infra-monitoring" -} - -module "grafana" { - source = "./grafana" - providers = { - nomad = nomad.yul1 - } - - # nomad - nomad_datacenters = [ "yul1" ] - - # grafana - grafana_job_name = "prod-grafana" - grafana_use_canary = true - grafana_group_count = 1 - grafana_vault_secret = { - use_vault_provider = false, - vault_kv_policy_name = "kv-secret", - vault_kv_path = "secret/data/grafana", - vault_kv_field_access_key = "access_key", - vault_kv_field_secret_key = "secret_key" - } - grafana_container_image = "grafana/grafana:7.3.7" - grafana_cpu = 1000 - grafana_mem = 2048 - grafana_port = 3000 -} - -module "minio" { - source = "./minio" - providers = { - nomad = nomad.yul1 - } - - # nomad - nomad_datacenters = [ "yul1" ] - nomad_host_volume = "prod-volume-data1-1" - - # minio - minio_job_name = "prod-minio" - minio_group_count = 4 - minio_service_name = "storage" - minio_host = "http://10.32.8.1{4...7}" - minio_port = 9000 - minio_container_image = "minio/minio:RELEASE.2020-12-03T05-49-24Z" - minio_vault_secret = { - use_vault_provider = false, - vault_kv_policy_name = "kv-secret", - vault_kv_path = "secret/data/minio", - vault_kv_field_access_key = "access_key", - vault_kv_field_secret_key = "secret_key" - } - minio_data_dir = "/data/" - minio_use_host_volume = true - minio_use_canary = true - minio_envs = [ "MINIO_BROWSER=\"off\"" ] - - # minio client - mc_job_name = "prod-mc" - mc_container_image = "minio/mc:RELEASE.2020-12-10T01-26-17Z" - mc_extra_commands = [ - "mc policy set public LOCALMINIO/logs.fd.io", - "mc policy set public LOCALMINIO/docs.fd.io", - "mc ilm add --expiry-days '180' LOCALMINIO/logs.fd.io", - "mc admin user add LOCALMINIO storage Storage1234", - "mc admin policy set LOCALMINIO writeonly user=storage" - ] - minio_buckets = [ "logs.fd.io", "docs.fd.io" ] -} - -module "nginx" { - source = "./nginx" - providers = { - nomad = nomad.yul1 - } - - # nomad - nomad_datacenters = [ "yul1" ] - nomad_host_volume = "prod-volume-data1-1" - - # nginx - nginx_job_name = "prod-nginx" - nginx_use_host_volume = true -} - -module "prometheus" { - source = "./prometheus" - providers = { - nomad = nomad.yul1 - } - - # nomad - nomad_datacenters = [ "yul1" ] - nomad_host_volume = "prod-volume-data1-1" - - # prometheus - prometheus_job_name = "prod-prometheus" - prometheus_use_canary = true - prometheus_group_count = 4 - prometheus_vault_secret = { - use_vault_provider = false, - vault_kv_policy_name = "kv-secret", - vault_kv_path = "secret/data/prometheus", - vault_kv_field_access_key = "access_key", - vault_kv_field_secret_key = "secret_key" - } - prometheus_data_dir = "/data/" - prometheus_use_host_volume = true - prometheus_version = "2.24.0" - prometheus_cpu = 2000 - prometheus_mem = 8192 - prometheus_port = 9090 -} - -module "vpp_device" { - source = "./vpp_device" - providers = { - nomad = nomad.yul1 - } - - # nomad - nomad_datacenters = [ "yul1" ] - - # csit_shim - csit_shim_job_name = "prod-device-csit-shim" - csit_shim_group_count = "1" - csit_shim_cpu = "1500" - csit_shim_mem = "4096" - csit_shim_image_aarch64 = "csit_shim-ubuntu1804:local" - csit_shim_image_x86_64 = "csit_shim-ubuntu1804:local" -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/minio/conf/nomad/mc.hcl b/terraform-ci-infra/1n_nmd/minio/conf/nomad/mc.hcl deleted file mode 100644 index 238003bb00..0000000000 --- a/terraform-ci-infra/1n_nmd/minio/conf/nomad/mc.hcl +++ /dev/null @@ -1,73 +0,0 @@ -job "${job_name}" { - # The "region" parameter specifies the region in which to execute the job. - # If omitted, this inherits the default region name of "global". - # region = "global" - # - # The "datacenters" parameter specifies the list of datacenters which should - # be considered when placing this task. This must be provided. - datacenters = "${datacenters}" - - # The "type" parameter controls the type of job, which impacts the scheduler's - # decision on placement. This configuration is optional and defaults to - # "service". For a full list of job types and their differences, please see - # the online documentation. - # - # For more information, please see the online documentation at: - # - # https://www.nomadproject.io/docs/jobspec/schedulers.html - # - type = "batch" - - # The "group" stanza defines a series of tasks that should be co-located on - # the same Nomad client. Any task within a group will be placed on the same - # client. - # - # For more information and examples on the "group" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/group.html - # - group "prod-group1-mc" { - task "prod-task1-create-buckets" { - # The "driver" parameter specifies the task driver that should be used to - # run the task. - driver = "docker" - - %{ if use_vault_provider } - vault { - policies = "${vault_kv_policy_name}" - } - %{ endif } - - # The "config" stanza specifies the driver configuration, which is passed - # directly to the driver to start the task. The details of configurations - # are specific to each driver, so please see specific driver - # documentation for more information. - config { - image = "${image}" - entrypoint = [ - "/bin/sh", - "-c", - "${command}" - ] - dns_servers = [ "$${attr.unique.network.ip-address}" ] - privileged = false - } - - # The env stanza configures a list of environment variables to populate - # the task's environment before starting. - env { - %{ if use_vault_provider } - {{ with secret "${vault_kv_path}" }} - MINIO_ACCESS_KEY = "{{ .Data.data.${vault_kv_field_access_key} }}" - MINIO_SECRET_KEY = "{{ .Data.data.${vault_kv_field_secret_key} }}" - {{ end }} - %{ else } - MINIO_ACCESS_KEY = "${access_key}" - MINIO_SECRET_KEY = "${secret_key}" - %{ endif } - ${ envs } - } - } - } -} diff --git a/terraform-ci-infra/1n_nmd/minio/conf/nomad/minio.hcl b/terraform-ci-infra/1n_nmd/minio/conf/nomad/minio.hcl deleted file mode 100644 index 3889b51a9f..0000000000 --- a/terraform-ci-infra/1n_nmd/minio/conf/nomad/minio.hcl +++ /dev/null @@ -1,223 +0,0 @@ -job "${job_name}" { - # The "region" parameter specifies the region in which to execute the job. - # If omitted, this inherits the default region name of "global". - # region = "global" - # - # The "datacenters" parameter specifies the list of datacenters which should - # be considered when placing this task. This must be provided. - datacenters = "${datacenters}" - - # The "type" parameter controls the type of job, which impacts the scheduler's - # decision on placement. This configuration is optional and defaults to - # "service". For a full list of job types and their differences, please see - # the online documentation. - # - # https://www.nomadproject.io/docs/jobspec/schedulers - # - type = "service" - - update { - # The "max_parallel" parameter specifies the maximum number of updates to - # perform in parallel. In this case, this specifies to update a single task - # at a time. - max_parallel = 1 - - health_check = "checks" - - # The "min_healthy_time" parameter specifies the minimum time the allocation - # must be in the healthy state before it is marked as healthy and unblocks - # further allocations from being updated. - min_healthy_time = "10s" - - # The "healthy_deadline" parameter specifies the deadline in which the - # allocation must be marked as healthy after which the allocation is - # automatically transitioned to unhealthy. Transitioning to unhealthy will - # fail the deployment and potentially roll back the job if "auto_revert" is - # set to true. - healthy_deadline = "3m" - - # The "progress_deadline" parameter specifies the deadline in which an - # allocation must be marked as healthy. The deadline begins when the first - # allocation for the deployment is created and is reset whenever an allocation - # as part of the deployment transitions to a healthy state. If no allocation - # transitions to the healthy state before the progress deadline, the - # deployment is marked as failed. - progress_deadline = "10m" - -%{ if use_canary } - # The "canary" parameter specifies that changes to the job that would result - # in destructive updates should create the specified number of canaries - # without stopping any previous allocations. Once the operator determines the - # canaries are healthy, they can be promoted which unblocks a rolling update - # of the remaining allocations at a rate of "max_parallel". - # - # Further, setting "canary" equal to the count of the task group allows - # blue/green deployments. When the job is updated, a full set of the new - # version is deployed and upon promotion the old version is stopped. - canary = 1 - - # Specifies if the job should auto-promote to the canary version when all - # canaries become healthy during a deployment. Defaults to false which means - # canaries must be manually updated with the nomad deployment promote - # command. - auto_promote = true - - # The "auto_revert" parameter specifies if the job should auto-revert to the - # last stable job on deployment failure. A job is marked as stable if all the - # allocations as part of its deployment were marked healthy. - auto_revert = true -%{ endif } - } - - # All groups in this job should be scheduled on different hosts. - constraint { - operator = "distinct_hosts" - value = "true" - } - - # The "group" stanza defines a series of tasks that should be co-located on - # the same Nomad client. Any task within a group will be placed on the same - # client. - # - # https://www.nomadproject.io/docs/job-specification/group - # - group "prod-group1-minio" { - # The "count" parameter specifies the number of the task groups that should - # be running under this group. This value must be non-negative and defaults - # to 1. - count = ${group_count} - - # https://www.nomadproject.io/docs/job-specification/volume - %{ if use_host_volume } - volume "prod-volume1-minio" { - type = "host" - read_only = false - source = "${host_volume}" - } - %{ endif } - - # The restart stanza configures a tasks's behavior on task failure. Restarts - # happen on the client that is running the task. - # - # https://www.nomadproject.io/docs/job-specification/restart - # - restart { - interval = "30m" - attempts = 40 - delay = "15s" - mode = "delay" - } - - # The "task" stanza creates an individual unit of work, such as a Docker - # container, web application, or batch processing. - # - # https://www.nomadproject.io/docs/job-specification/task.html - # - task "prod-task1-minio" { - # The "driver" parameter specifies the task driver that should be used to - # run the task. - driver = "docker" - - %{ if use_host_volume } - volume_mount { - volume = "prod-volume1-minio" - destination = "${data_dir}" - read_only = false - } - %{ endif } - - %{ if use_vault_provider } - vault { - policies = "${vault_kv_policy_name}" - } - %{ endif } - - # The "config" stanza specifies the driver configuration, which is passed - # directly to the driver to start the task. The details of configurations - # are specific to each driver, so please see specific driver - # documentation for more information. - config { - image = "${image}" - dns_servers = [ "172.17.0.1" ] - network_mode = "host" - command = "server" - args = [ "${host}:${port}${data_dir}" ] - port_map { - http = ${port} - } - privileged = false - } - - # The env stanza configures a list of environment variables to populate - # the task's environment before starting. - env { -%{ if use_vault_provider } -{{ with secret "${vault_kv_path}" }} - MINIO_ACCESS_KEY = "{{ .Data.data.${vault_kv_field_access_key} }}" - MINIO_SECRET_KEY = "{{ .Data.data.${vault_kv_field_secret_key} }}" -{{ end }} -%{ else } - MINIO_ACCESS_KEY = "${access_key}" - MINIO_SECRET_KEY = "${secret_key}" -%{ endif } - ${ envs } - } - - # The service stanza instructs Nomad to register a service with Consul. - # - # https://www.nomadproject.io/docs/job-specification/service - # - service { - name = "${service_name}" - port = "http" - tags = [ "storage$${NOMAD_ALLOC_INDEX}" ] - check { - name = "Min.io Server HTTP Check Live" - type = "http" - port = "http" - protocol = "http" - method = "GET" - path = "/minio/health/live" - interval = "10s" - timeout = "2s" - } - check { - name = "Min.io Server HTTP Check Ready" - type = "http" - port = "http" - protocol = "http" - method = "GET" - path = "/minio/health/ready" - interval = "10s" - timeout = "2s" - } - } - - # The "resources" stanza describes the requirements a task needs to - # execute. Resource requirements include memory, network, cpu, and more. - # This ensures the task will execute on a machine that contains enough - # resource capacity. - # - # https://www.nomadproject.io/docs/job-specification/resources - # - resources { - cpu = ${cpu} - memory = ${memory} - # The network stanza specifies the networking requirements for the task - # group, including the network mode and port allocations. When scheduling - # jobs in Nomad they are provisioned across your fleet of machines along - # with other jobs and services. Because you don't know in advance what host - # your job will be provisioned on, Nomad will provide your tasks with - # network configuration when they start up. - # - # https://www.nomadproject.io/docs/job-specification/network - # - network { - port "http" { - static = ${port} - } - } - } - } - } -} diff --git a/terraform-ci-infra/1n_nmd/minio/main.tf b/terraform-ci-infra/1n_nmd/minio/main.tf deleted file mode 100644 index 62d143f4b1..0000000000 --- a/terraform-ci-infra/1n_nmd/minio/main.tf +++ /dev/null @@ -1,82 +0,0 @@ -locals { - datacenters = join(",", var.nomad_datacenters) - minio_env_vars = join("\n", - concat([ - ], var.minio_envs) - ) - mc_env_vars = join("\n", - concat([ - ], var.mc_envs) - ) - mc_formatted_bucket_list = formatlist("LOCALMINIO/%s", var.minio_buckets) - mc_add_config_command = concat( - [ - "mc", - "config", - "host", - "add", - "LOCALMINIO", - "http://${var.minio_service_name}.service.consul:${var.minio_port}", - "$MINIO_ACCESS_KEY", - "$MINIO_SECRET_KEY", - ]) - mc_create_bucket_command = concat(["mc", "mb", "-p"], local.mc_formatted_bucket_list) - command = join(" ", concat(local.mc_add_config_command, ["&&"], local.mc_create_bucket_command, [";"], concat(var.mc_extra_commands))) -} - -data "template_file" "nomad_job_minio" { - template = file("${path.module}/conf/nomad/minio.hcl") - vars = { - job_name = var.minio_job_name - datacenters = local.datacenters - use_canary = var.minio_use_canary - group_count = var.minio_group_count - use_host_volume = var.minio_use_host_volume - host_volume = var.nomad_host_volume - service_name = var.minio_service_name - host = var.minio_host - port = var.minio_port - upstreams = jsonencode(var.minio_upstreams) - cpu_proxy = var.minio_resource_proxy.cpu - memory_proxy = var.minio_resource_proxy.memory - use_vault_provider = var.minio_vault_secret.use_vault_provider - image = var.minio_container_image - access_key = var.minio_access_key - secret_key = var.minio_secret_key - data_dir = var.minio_data_dir - envs = local.minio_env_vars - cpu = var.minio_cpu - memory = var.minio_memory - } -} - -data "template_file" "nomad_job_mc" { - template = file("${path.module}/conf/nomad/mc.hcl") - vars = { - job_name = var.mc_job_name - service_name = var.mc_service_name - datacenters = local.datacenters - minio_service_name = var.minio_service_name - minio_port = var.minio_port - image = var.mc_container_image - access_key = var.minio_access_key - secret_key = var.minio_secret_key - use_vault_provider = var.minio_vault_secret.use_vault_provider - envs = local.mc_env_vars - command = local.command - } -} - -resource "nomad_job" "nomad_job_minio" { - jobspec = data.template_file.nomad_job_minio.rendered - detach = false -} - -#resource "nomad_job" "nomad_job_mc" { -# jobspec = data.template_file.nomad_job_mc.rendered -# detach = false -# -# depends_on = [ -# nomad_job.nomad_job_minio -# ] -#} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/minio/outputs.tf b/terraform-ci-infra/1n_nmd/minio/outputs.tf deleted file mode 100644 index 309cd3b9d0..0000000000 --- a/terraform-ci-infra/1n_nmd/minio/outputs.tf +++ /dev/null @@ -1,4 +0,0 @@ -output "minio_service_name" { - description = "Minio service name" - value = data.template_file.nomad_job_minio.vars.service_name -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/minio/variables.tf b/terraform-ci-infra/1n_nmd/minio/variables.tf deleted file mode 100644 index dbac3465ee..0000000000 --- a/terraform-ci-infra/1n_nmd/minio/variables.tf +++ /dev/null @@ -1,170 +0,0 @@ -# Nomad -variable "nomad_datacenters" { - description = "Nomad data centers" - type = list(string) - default = [ "dc1" ] -} - -variable "nomad_host_volume" { - description = "Nomad Host Volume" - type = string - default = "persistence" -} - -# Minio -variable "minio_job_name" { - description = "Minio job name" - type = string - default = "minio" -} - -variable "minio_service_name" { - description = "Minio service name" - type = string - default = "minio" -} - -variable "minio_group_count" { - description = "Number of Minio group instances" - type = number - default = 1 -} - -variable "minio_host" { - description = "Minio host" - type = string - default = "127.0.0.1" -} - -variable "minio_port" { - description = "Minio port" - type = number - default = 9000 -} - -variable "minio_cpu" { - description = "CPU allocation for Minio" - type = number - default = 40000 -} - -variable "minio_memory" { - description = "Memory allocation for Minio" - type = number - default = 40000 -} - -variable "minio_container_image" { - description = "Minio docker image" - type = string - default = "minio/minio:latest" -} - -variable "minio_envs" { - description = "Minio environment variables" - type = list(string) - default = [] -} - -variable "minio_access_key" { - description = "Minio access key" - type = string - default = "minio" -} - -variable "minio_secret_key" { - description = "Minio secret key" - type = string - default = "minio123" -} - -variable "minio_data_dir" { - description = "Minio server data dir" - type = string - default = "/data/" -} - -variable "minio_use_host_volume" { - description = "Use Nomad host volume feature" - type = bool - default = false -} - -variable "minio_use_canary" { - description = "Uses canary deployment for Minio" - type = bool - default = false -} - -variable "minio_vault_secret" { - description = "Set of properties to be able to fetch secret from vault" - type = object({ - use_vault_provider = bool, - vault_kv_policy_name = string, - vault_kv_path = string, - vault_kv_field_access_key = string, - vault_kv_field_secret_key = string - }) -} - -variable "minio_resource_proxy" { - description = "Minio proxy resources" - type = object({ - cpu = number, - memory = number - }) - default = { - cpu = 200, - memory = 128 - } - validation { - condition = var.minio_resource_proxy.cpu >= 200 && var.minio_resource_proxy.memory >= 128 - error_message = "Proxy resource must be at least: cpu=200, memory=128." - } -} - -# MC -variable "mc_job_name" { - description = "Minio client job name" - type = string - default = "mc" -} - -variable "mc_service_name" { - description = "Minio client service name" - type = string - default = "mc" -} - -variable "mc_container_image" { - description = "Minio client docker image" - type = string - default = "minio/mc:latest" -} - -variable "mc_envs" { - description = "Minio client environment variables" - type = list(string) - default = [] -} - -variable "minio_buckets" { - description = "List of buckets to create on startup" - type = list(string) - default = [] -} - -variable "minio_upstreams" { - description = "List of upstream services (list of object with service_name, port)" - type = list(object({ - service_name = string, - port = number, - })) - default = [] -} - -variable "mc_extra_commands" { - description = "Extra commands to run in MC container after creating buckets" - type = list(string) - default = [""] -} \ No newline at end of file diff --git a/terraform-ci-infra/1n_nmd/minio/versions.tf b/terraform-ci-infra/1n_nmd/minio/versions.tf deleted file mode 100644 index 960bd4bba6..0000000000 --- a/terraform-ci-infra/1n_nmd/minio/versions.tf +++ /dev/null @@ -1,13 +0,0 @@ -terraform { - required_providers { - nomad = { - source = "hashicorp/nomad" - version = "~> 1.4.9" - } - template = { - source = "hashicorp/template" - version = "~> 2.1.2" - } - } - required_version = ">= 0.13" -} diff --git a/terraform-ci-infra/1n_nmd/nginx/conf/nomad/nginx.hcl b/terraform-ci-infra/1n_nmd/nginx/conf/nomad/nginx.hcl deleted file mode 100644 index 0775a498da..0000000000 --- a/terraform-ci-infra/1n_nmd/nginx/conf/nomad/nginx.hcl +++ /dev/null @@ -1,283 +0,0 @@ -job "${job_name}" { - # The "region" parameter specifies the region in which to execute the job. - # If omitted, this inherits the default region name of "global". - # region = "global" - # - # The "datacenters" parameter specifies the list of datacenters which should - # be considered when placing this task. This must be provided. - datacenters = "${datacenters}" - - # The "type" parameter controls the type of job, which impacts the scheduler's - # decision on placement. This configuration is optional and defaults to - # "service". For a full list of job types and their differences, please see - # the online documentation. - # - # For more information, please see the online documentation at: - # - # https://www.nomadproject.io/docs/jobspec/schedulers.html - # - type = "service" - - update { - # The "max_parallel" parameter specifies the maximum number of updates to - # perform in parallel. In this case, this specifies to update a single task - # at a time. - max_parallel = 0 - - # The "min_healthy_time" parameter specifies the minimum time the allocation - # must be in the healthy state before it is marked as healthy and unblocks - # further allocations from being updated. - min_healthy_time = "10s" - - # The "healthy_deadline" parameter specifies the deadline in which the - # allocation must be marked as healthy after which the allocation is - # automatically transitioned to unhealthy. Transitioning to unhealthy will - # fail the deployment and potentially roll back the job if "auto_revert" is - # set to true. - healthy_deadline = "3m" - - # The "progress_deadline" parameter specifies the deadline in which an - # allocation must be marked as healthy. The deadline begins when the first - # allocation for the deployment is created and is reset whenever an allocation - # as part of the deployment transitions to a healthy state. If no allocation - # transitions to the healthy state before the progress deadline, the - # deployment is marked as failed. - progress_deadline = "10m" - - # The "auto_revert" parameter specifies if the job should auto-revert to the - # last stable job on deployment failure. A job is marked as stable if all the - # allocations as part of its deployment were marked healthy. - auto_revert = false - - # The "canary" parameter specifies that changes to the job that would result - # in destructive updates should create the specified number of canaries - # without stopping any previous allocations. Once the operator determines the - # canaries are healthy, they can be promoted which unblocks a rolling update - # of the remaining allocations at a rate of "max_parallel". - # - # Further, setting "canary" equal to the count of the task group allows - # blue/green deployments. When the job is updated, a full set of the new - # version is deployed and upon promotion the old version is stopped. - canary = 0 - } - - # The reschedule stanza specifies the group's rescheduling strategy. If - # specified at the job level, the configuration will apply to all groups - # within the job. If the reschedule stanza is present on both the job and the - # group, they are merged with the group stanza taking the highest precedence - # and then the job. - reschedule { - delay = "30s" - delay_function = "constant" - unlimited = true - } - - # The "group" stanza defines a series of tasks that should be co-located on - # the same Nomad client. Any task within a group will be placed on the same - # client. - # - # For more information and examples on the "group" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/group.html - # - group "prod-group1-nginx" { - # The "count" parameter specifies the number of the task groups that should - # be running under this group. This value must be non-negative and defaults - # to 1. - count = 1 - - # https://www.nomadproject.io/docs/job-specification/volume - %{ if use_host_volume } - volume "prod-volume1-nginx" { - type = "host" - read_only = false - source = "${host_volume}" - } - %{ endif } - - # The restart stanza configures a tasks's behavior on task failure. Restarts - # happen on the client that is running the task. - # - # https://www.nomadproject.io/docs/job-specification/restart - # - restart { - interval = "30m" - attempts = 40 - delay = "15s" - mode = "delay" - } - - # The "task" stanza creates an individual unit of work, such as a Docker - # container, web application, or batch processing. - # - # For more information and examples on the "task" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/task.html - # - task "prod-task1-nginx" { - # The "driver" parameter specifies the task driver that should be used to - # run the task. - driver = "docker" - - # The "config" stanza specifies the driver configuration, which is passed - # directly to the driver to start the task. The details of configurations - # are specific to each driver, so please see specific driver - # documentation for more information. - config { - image = "nginx:stable" - port_map { - https = 443 - } - privileged = false - volumes = [ - "/etc/consul.d/ssl/consul.pem:/etc/ssl/certs/nginx-cert.pem", - "/etc/consul.d/ssl/consul-key.pem:/etc/ssl/private/nginx-key.pem", - "custom/upstream.conf:/etc/nginx/conf.d/upstream.conf", - "custom/logs.conf:/etc/nginx/conf.d/logs.conf", - "custom/docs.conf:/etc/nginx/conf.d/docs.conf" - ] - } - - # The "template" stanza instructs Nomad to manage a template, such as - # a configuration file or script. This template can optionally pull data - # from Consul or Vault to populate runtime configuration data. - # - # For more information and examples on the "template" stanza, please see - # the online documentation at: - # - # https://www.nomadproject.io/docs/job-specification/template.html - # - template { - data = < jenkins_job_success{id=~".*"} - for: 0m - labels: - severity: critical - annotations: - summary: "Jenkins Job Health detected high failure rate on jenkins jobs." - description: "Job: {{ $labels.id }}" - - alert: JenkinsJobHealthExporterUnstable - expr: jenkins_job_unstable{id=~".*"} > jenkins_job_success{id=~".*"} - for: 0m - labels: - severity: warning - annotations: - summary: "Jenkins Job Health detected high unstable rate on jenkins jobs." - description: "Job: {{ $labels.id }}" -- name: "Consul" - rules: - - alert: ConsulServiceHealthcheckFailed - expr: consul_catalog_service_node_healthy == 0 - for: 0m - labels: - severity: critical - annotations: - summary: "Consul service healthcheck failed (instance {{ $labels.instance }})." - description: "Service: `{{ $labels.service_name }}` Healthcheck: `{{ $labels.service_id }}`." - - alert: ConsulMissingMasterNode - expr: consul_raft_peers < 3 - for: 0m - labels: - severity: critical - annotations: - summary: "Consul missing master node (instance {{ $labels.instance }})." - description: "Numbers of consul raft peers should be 3, in order to preserve quorum." - - alert: ConsulAgentUnhealthy - expr: consul_health_node_status{status="critical"} == 1 - for: 0m - labels: - severity: critical - annotations: - summary: "Consul agent unhealthy (instance {{ $labels.instance }})." - description: "A Consul agent is down." -- name: "Hosts" - rules: - - alert: NodeDown - expr: up == 0 - for: 0m - labels: - severity: critical - annotations: - summary: "Prometheus target missing (instance {{ $labels.instance }})." - description: "A Prometheus target has disappeared. An exporter might be crashed." - - alert: HostHighCpuLoad - expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 95 - for: 0m - labels: - severity: warning - annotations: - summary: "Host high CPU load (instance {{ $labels.instance }})." - description: "CPU load is > 95%." - - alert: HostOutOfMemory - expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10 - for: 2m - labels: - severity: warning - annotations: - summary: "Host out of memory (instance {{ $labels.instance }})." - description: "Node memory is filling up (< 10% left)." - - alert: HostOomKillDetected - expr: increase(node_vmstat_oom_kill[1m]) > 0 - for: 0m - labels: - severity: warning - annotations: - summary: "Host OOM kill detected (instance {{ $labels.instance }})." - description: "OOM kill detected." - - alert: HostMemoryUnderMemoryPressure - expr: rate(node_vmstat_pgmajfault[1m]) > 1000 - for: 2m - labels: - severity: warning - annotations: - summary: "Host memory under memory pressure (instance {{ $labels.instance }})." - description: "The node is under heavy memory pressure. High rate of major page faults." - - alert: HostOutOfDiskSpace - expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0 - for: 2m - labels: - severity: warning - annotations: - summary: "Host out of disk space (instance {{ $labels.instance }})." - description: "Disk is almost full (< 10% left)." - - alert: HostRaidDiskFailure - expr: node_md_disks{state="failed"} > 0 - for: 2m - labels: - severity: warning - annotations: - summary: "Host RAID disk failure (instance {{ $labels.instance }})." - description: "At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap." - - alert: HostConntrackLimit - expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 0.8 - for: 5m - labels: - severity: warning - annotations: - summary: "Host conntrack limit (instance {{ $labels.instance }})." - description: "The number of conntrack is approching limit." - - alert: HostNetworkInterfaceSaturated - expr: (rate(node_network_receive_bytes_total{device!~"^tap.*"}[1m]) + rate(node_network_transmit_bytes_total{device!~"^tap.*"}[1m])) / node_network_speed_bytes{device!~"^tap.*"} > 0.8 - for: 1m - labels: - severity: warning - annotations: - summary: "Host Network Interface Saturated (instance {{ $labels.instance }})." - description: "The network interface {{ $labels.interface }} on {{ $labels.instance }} is getting overloaded." - - alert: HostSystemdServiceCrashed - expr: node_systemd_unit_state{state="failed"} == 1 - for: 0m - labels: - severity: warning - annotations: - summary: "Host SystemD service crashed (instance {{ $labels.instance }})." - description: "SystemD service crashed." - - alert: HostEdacCorrectableErrorsDetected - expr: increase(node_edac_correctable_errors_total[1m]) > 0 - for: 0m - labels: - severity: info - annotations: - summary: "Host EDAC Correctable Errors detected (instance {{ $labels.instance }})." - description: '{{ $labels.instance }} has had {{ printf "%.0f" $value }} correctable memory errors reported by EDAC in the last 5 minutes.' - - alert: HostEdacUncorrectableErrorsDetected - expr: node_edac_uncorrectable_errors_total > 0 - for: 0m - labels: - severity: warning - annotations: - summary: "Host EDAC Uncorrectable Errors detected (instance {{ $labels.instance }})." - description: '{{ $labels.instance }} has had {{ printf "%.0f" $value }} uncorrectable memory errors reported by EDAC in the last 5 minutes.' -- name: "Min.io" - rules: - - alert: MinioDiskOffline - expr: minio_offline_disks > 0 - for: 0m - labels: - severity: critical - annotations: - summary: "Minio disk offline (instance {{ $labels.instance }})" - description: "Minio disk is offline." - - alert: MinioStorageSpaceExhausted - expr: minio_disk_storage_free_bytes / 1024 / 1024 / 1024 < 10 - for: 2m - labels: - severity: warning - annotations: - summary: "Minio storage space exhausted (instance {{ $labels.instance }})." - description: "Minio storage space is low (< 10 GB)." -- name: "Prometheus" - rules: - - alert: PrometheusConfigurationReloadFailure - expr: prometheus_config_last_reload_successful != 1 - for: 0m - labels: - severity: warning - annotations: - summary: "Prometheus configuration reload failure (instance {{ $labels.instance }})." - description: "Prometheus configuration reload error." - - alert: PrometheusTooManyRestarts - expr: changes(process_start_time_seconds{job=~"prometheus|pushgateway|alertmanager"}[15m]) > 2 - for: 0m - labels: - severity: warning - annotations: - summary: "Prometheus too many restarts (instance {{ $labels.instance }})." - description: "Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping." - - alert: PrometheusAlertmanagerConfigurationReloadFailure - expr: alertmanager_config_last_reload_successful != 1 - for: 0m - labels: - severity: warning - annotations: - summary: "Prometheus AlertManager configuration reload failure (instance {{ $labels.instance }})." - description: "AlertManager configuration reload error." - - alert: PrometheusRuleEvaluationFailures - expr: increase(prometheus_rule_evaluation_failures_total[3m]) > 0 - for: 0m - labels: - severity: critical - annotations: - summary: "Prometheus rule evaluation failures (instance {{ $labels.instance }})." - description: "Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts." - - alert: PrometheusTargetScrapingSlow - expr: prometheus_target_interval_length_seconds{quantile="0.9"} > 60 - for: 5m - labels: - severity: warning - annotations: - summary: "Prometheus target scraping slow (instance {{ $labels.instance }})." - description: "Prometheus is scraping exporters slowly." - - alert: PrometheusTsdbCompactionsFailed - expr: increase(prometheus_tsdb_compactions_failed_total[1m]) > 0 - for: 0m - labels: - severity: critical - annotations: - summary: "Prometheus TSDB compactions failed (instance {{ $labels.instance }})." - description: "Prometheus encountered {{ $value }} TSDB compactions failures." - - alert: PrometheusTsdbHeadTruncationsFailed - expr: increase(prometheus_tsdb_head_truncations_failed_total[1m]) > 0 - for: 0m - labels: - severity: critical - annotations: - summary: "Prometheus TSDB head truncations failed (instance {{ $labels.instance }})." - description: "Prometheus encountered {{ $value }} TSDB head truncation failures." - - alert: PrometheusTsdbWalCorruptions - expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) > 0 - for: 0m - labels: - severity: critical - annotations: - summary: "Prometheus TSDB WAL corruptions (instance {{ $labels.instance }})." - description: "Prometheus encountered {{ $value }} TSDB WAL corruptions." - - alert: PrometheusTsdbWalTruncationsFailed - expr: increase(prometheus_tsdb_wal_truncations_failed_total[1m]) > 0 - for: 0m - labels: - severity: critical - annotations: - summary: "Prometheus TSDB WAL truncations failed (instance {{ $labels.instance }})." - description: "Prometheus encountered {{ $value }} TSDB WAL truncation failures." -EOH - } - - template { - change_mode = "noop" - change_signal = "SIGINT" - destination = "secrets/prometheus.yml" - data = <= 2.1.21 - - Configured with personal "AWS Access Key ID" and "AWS Secret Access Key" - - See: https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2-linux.html - -terraform >= v0.13 - - Terraform's Ansible provisioner requires manual installation - - see: https://github.com/radekg/terraform-provisioner-ansible - - Tested on v2.5.0 - - -Azure: ----------------------- -Testbed deployment - Microsoft Azure -- ./3n_azure_fsv2/ -- cgit 1.2.3-korg