Commit 9c43b33a authored by godog's avatar godog

prometheus: use node_exporter 0.16 metric names

parent 0be0500f
Pipeline #5288 canceled with stage
in 0 seconds
......@@ -2,7 +2,7 @@ groups:
- name: roles/prometheus/files/rules/alerts_disk.conf
rules:
- alert: DiskWillFillIn4Hours
expr: predict_linear(node_filesystem_free{job="node"}[1h], 4 * 3600) < 0
expr: predict_linear(node_filesystem_free_bytes{job="node"}[1h], 4 * 3600) < 0
for: 5m
labels:
severity: page
......
......@@ -10,7 +10,7 @@ groups:
description: 'Conntrack table on {{ $labels.instance }} is more than 90% full.'
summary: 'Conntrack table on {{ $labels.instance }} is almost full'
- alert: NetworkErrors
expr: instance:node_network_errs:rate5m > 1
expr: instance:node_network_errs_total:rate5m > 1
for: 15m
labels:
severity: page
......@@ -18,7 +18,7 @@ groups:
summary: 'High rate of packet errors on {{ $labels.instance }}/{{ $labels.device }}'
description: 'High rate of packet errors on {{ $labels.instance }} device {{ $labels.device }}.'
- alert: NetworkDrops
expr: instance:node_network_drop:rate5m > 1
expr: instance:node_network_drop_total:rate5m > 1
for: 15m
labels:
severity: page
......
......@@ -2,10 +2,10 @@ groups:
- name: roles/prometheus/files/rules/rules_cpu.conf
rules:
- record: instance:node_cpus:count
expr: count(node_cpu{mode="idle"}) by (host, instance)
expr: count(node_cpu_seconds_total{mode="idle"}) by (host, instance)
- record: instance_mode:node_cpu:rate5m
expr: sum(rate(node_cpu[5m])) by (host, instance, mode)
expr: sum(rate(node_cpu_seconds_total[5m])) by (host, instance, mode)
- record: instance_utilization:node_cpu:rate5m
expr: sum(rate(node_cpu{mode!="idle"}[5m])) by (host, instance)
expr: sum(rate(node_cpu_seconds_total{mode!="idle"}[5m])) by (host, instance)
- record: instance_utilization:rate5m
expr: instance_utilization:node_cpu:rate5m / instance:node_cpus:count
groups:
- name: roles/prometheus/files/rules/rules_disk.conf
rules:
- record: instance:node_disk_writes_completed:irate1m
expr: sum(irate(node_disk_writes_completed{device=~"sd.*"}[1m])) WITHOUT (device)
- record: instance:node_disk_reads_completed:irate1m
expr: sum(irate(node_disk_reads_completed{device=~"sd.*"}[1m])) WITHOUT (device)
- record: instance:node_disk_writes_completed_total:irate1m
expr: sum(irate(node_disk_writes_completed_total{device=~"sd.*"}[1m])) WITHOUT (device)
- record: instance:node_disk_reads_completed_total:irate1m
expr: sum(irate(node_disk_reads_completed_total{device=~"sd.*"}[1m])) WITHOUT (device)
......@@ -3,27 +3,27 @@ groups:
rules:
- record: instance:conntrack_full:ratio
expr: node_nf_conntrack_entries / node_nf_conntrack_entries_limit
- record: instance:node_network_errs
expr: node_network_receive_errs + node_network_transmit_errs
- record: instance:node_network_drop
expr: node_network_receive_drop + node_network_transmit_drop
- record: instance:node_network_errs:rate5m
expr: rate(instance:node_network_errs[5m])
- record: instance:node_network_drop:rate5m
expr: rate(instance:node_network_drop[5m])
- record: instance:network_transmit_bytes:rate5m
expr: sum(rate(node_network_transmit_bytes{device!="lo"}[5m])) without (device)
- record: instance:network_receive_bytes:rate5m
expr: sum(rate(node_network_receive_bytes{device!="lo"}[5m])) without (device)
- record: instance:public_network_transmit_bytes:rate5m
expr: sum(rate(node_network_transmit_bytes{device=~"(eth|e[nl][op]).*"}[5m])) without (device)
- record: instance:public_network_receive_bytes:rate5m
expr: sum(rate(node_network_receive_bytes{device=~"(eth|e[nl][op]).*"}[5m])) without (device)
- record: global:public_network_transmit_bytes:rate5m
expr: sum(instance:public_network_transmit_bytes:rate5m) without (instance, host)
- record: global:public_network_receive_bytes:rate5m
expr: sum(instance:public_network_receive_bytes:rate5m) without (instance, host)
- record: global:network_transmit_bytes:rate5m
expr: sum(instance:network_transmit_bytes:rate5m) without (instance, host)
- record: global:network_receive_bytes:rate5m
expr: sum(instance:network_receive_bytes:rate5m) without (instance, host)
- record: instance:node_network_errs_total
expr: node_network_receive_errs_total + node_network_transmit_errs_total
- record: instance:node_network_drop_total
expr: node_network_receive_drop_total + node_network_transmit_drop_total
- record: instance:node_network_errs_total:rate5m
expr: rate(instance:node_network_errs_total[5m])
- record: instance:node_network_drop_total:rate5m
expr: rate(instance:node_network_drop_total[5m])
- record: instance:network_transmit_bytes_total:rate5m
expr: sum(rate(node_network_transmit_bytes_total{device!="lo"}[5m])) without (device)
- record: instance:network_receive_bytes_total:rate5m
expr: sum(rate(node_network_receive_bytes_total{device!="lo"}[5m])) without (device)
- record: instance:public_network_transmit_bytes_total:rate5m
expr: sum(rate(node_network_transmit_bytes_total{device=~"(eth|e[nl][op]).*"}[5m])) without (device)
- record: instance:public_network_receive_bytes_total:rate5m
expr: sum(rate(node_network_receive_bytes_total{device=~"(eth|e[nl][op]).*"}[5m])) without (device)
- record: global:public_network_transmit_bytes_total:rate5m
expr: sum(instance:public_network_transmit_bytes_total:rate5m) without (instance, host)
- record: global:public_network_receive_bytes_total:rate5m
expr: sum(instance:public_network_receive_bytes_total:rate5m) without (instance, host)
- record: global:network_transmit_bytes_total:rate5m
expr: sum(instance:network_transmit_bytes_total:rate5m) without (instance, host)
- record: global:network_receive_bytes_total:rate5m
expr: sum(instance:network_receive_bytes_total:rate5m) without (instance, host)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment