From 5f1278eb61501f031e63bd79e0c98b3f57f5cd34 Mon Sep 17 00:00:00 2001 From: ale <ale@incal.net> Date: Wed, 24 Feb 2021 15:34:10 +0000 Subject: [PATCH] Replace the "target" field with "host" on prober metrics --- .../templates/prometheus.yml.j2 | 22 +++++++++---------- .../templates/rules/alerts_base.conf.yml | 6 ++--- .../templates/rules/rules_base.conf.yml | 12 +++++----- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/roles/float-infra-prometheus/templates/prometheus.yml.j2 b/roles/float-infra-prometheus/templates/prometheus.yml.j2 index e2c21a4c..d1209843 100644 --- a/roles/float-infra-prometheus/templates/prometheus.yml.j2 +++ b/roles/float-infra-prometheus/templates/prometheus.yml.j2 @@ -108,7 +108,7 @@ scrape_configs: - ping relabel_configs: - source_labels: [__address__] - target_label: target + target_label: host regex: "([^.]*).*" replacement: "${1}" - source_labels: [__address__] @@ -117,7 +117,7 @@ scrape_configs: target_label: instance - target_label: __address__ replacement: {{ prober_host }}.prometheus.{{ domain }}:9115 - - target_label: host + - target_label: prober_host replacement: {{ prober_host }} static_configs: - targets: @@ -137,7 +137,7 @@ scrape_configs: - http_base relabel_configs: - source_labels: [__address__] - target_label: target + target_label: host regex: "https?://([^.:/]*).*" replacement: "${1}" - source_labels: [__address__] @@ -146,7 +146,7 @@ scrape_configs: target_label: instance - target_label: __address__ replacement: {{ prober_host }}.prometheus.{{ domain }}:9115 - - target_label: host + - target_label: prober_host replacement: {{ prober_host }} static_configs: - targets: @@ -165,7 +165,7 @@ scrape_configs: module: [dns_toplevel] relabel_configs: - source_labels: [__address__] - target_label: target + target_label: host regex: "([^.]*).*" replacement: "${1}" - source_labels: [__address__] @@ -174,7 +174,7 @@ scrape_configs: target_label: instance - target_label: __address__ replacement: {{ prober_host }}.prometheus.{{ domain }}:9115 - - target_label: host + - target_label: prober_host replacement: {{ prober_host }} static_configs: - targets: @@ -191,7 +191,7 @@ scrape_configs: {# Additional blackbox probers #} {% for p in prometheus_additional_blackbox_probers | default([]) | sort(attribute='name') %} -{% for host in groups[p.service] | sort %} +{% for prober_host in groups[p.service] | sort %} - job_name: "prober_{{ p.name }}_{{ loop.index }}" metrics_path: "/probe" params: @@ -200,16 +200,16 @@ scrape_configs: relabel_configs: - source_labels: [__address__] target_label: target - regex: "(?:https?://)?([^.:/]*).*" + regex: "{{ p.target_label_regex | default('([^:]*):.*') }}" replacement: "${1}" - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ - replacement: {{ host }}.{{ p.service }}.{{ domain }}:{{ p.port }} - - target_label: host - replacement: {{ host }} + replacement: {{ prober_host }}.{{ p.service }}.{{ domain }}:{{ p.port }} + - target_label: prober_host + replacement: {{ prober_host }} static_configs: - targets: {# diff --git a/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml b/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml index ab5bbb00..6e8a5541 100644 --- a/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml +++ b/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml @@ -72,9 +72,9 @@ groups: severity: page scope: host annotations: - summary: 'Probe {{ $labels.probe }}@{{ $labels.target }} is failing' - description: 'Probe {{ $labels.probe }} ({{ $labels.zone }}) is failing - for target {{ $labels.target }} (success ratio {{ $value }}).' + summary: 'Probe {{ $labels.probe }}@{{ $labels.host }} is failing' + description: 'Probe {{ $labels.probe }} ({{ $labels.host }}) is failing + for target {{ $labels.host }} (success ratio {{ $value }}).' runbook: '[[ alert_runbook_fmt | format("ProbeFailure") ]]' - alert: ProbeFailure diff --git a/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml b/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml index 1dd30827..456c1ecb 100644 --- a/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml +++ b/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml @@ -11,9 +11,9 @@ groups: # Sum prober metrics over the probers (hosts), producing # an aggregation by target. - record: target:probe_success:count - expr: count(probe_success) by (probe,probeset,zone,target) + expr: count(probe_success) by (probe,probeset,zone,host) - record: target:probe_success:sum - expr: sum(probe_success) by (probe,probeset,zone,target) + expr: sum(probe_success) by (probe,probeset,zone,host) - record: target:probe_success:ratio expr: target:probe_success:sum / target:probe_success:count @@ -25,9 +25,7 @@ groups: - record: probe:probe_success:ratio expr: probe:probe_success:sum / probe:probe_success:count - # Special metric for the ping probe. The label_replace() sets - # the host to the value of the target label (instead of the host - # running the prober). The 'bool' qualifier makes the greater-than - # operation not act as a filter. + # Special metric for the ping probe. + # The 'bool' qualifier makes the greater-than operation not act as a filter. - record: host_reachable - expr: label_replace(target:probe_success:ratio{probe="ping"} > bool 0.6, "host", "$1", "target", "(.*)") + expr: target:probe_success:ratio{probe="ping"} > bool 0.6 -- GitLab