Skip to content
Snippets Groups Projects
Commit 795220f2 authored by ale's avatar ale
Browse files

Add the prober_float_service to the ProbeFailure alerts

Extends the alert description with a link to the specific prober dashboard.
parent 6f1ac8c2
No related branches found
No related tags found
No related merge requests found
......@@ -73,8 +73,11 @@ groups:
scope: host
annotations:
summary: 'Probe {{ $labels.probe }}@{{ $labels.host }} is failing'
description: 'Probe {{ $labels.probe }} ({{ $labels.host }}) is failing
for target {{ $labels.host }} (success ratio {{ $value }}).'
description: >-
Probe {{ $labels.probe }} ({{ $labels.host }}) is failing for target {{ $labels.host }}
(success ratio {{ $value }}).
Failed probe logs: https://{{ $labels.prober_float_service }}.[[ domain_public[0] ]]/
runbook: '[[ alert_runbook_fmt | format("ProbeFailure") ]]'
- alert: ProbeFailure
......@@ -85,8 +88,11 @@ groups:
scope: global
annotations:
summary: 'Probe {{ $labels.probe }} is failing globally'
description: 'Probe {{ $labels.probe }} ({{ $labels.zone }}) is failing
globally (success ratio {{ $value }}).'
description: >-
Probe {{ $labels.probe }} ({{ $labels.zone }}) is failing globally
(success ratio {{ $value }}).
Failed probe logs: https://{{ $labels.prober_float_service }}.[[ domain_public[0] ]]/
runbook: '[[ alert_runbook_fmt | format("ProbeFailure") ]]'
- alert: CronJobFailure
......
......@@ -11,17 +11,17 @@ groups:
# Sum prober metrics over the probers (hosts), producing
# an aggregation by target.
- record: target:probe_success:count
expr: count(probe_success) by (probe,probeset,zone,host)
expr: count(probe_success) by (probe,probeset,zone,host,prober_float_service)
- record: target:probe_success:sum
expr: sum(probe_success) by (probe,probeset,zone,host)
expr: sum(probe_success) by (probe,probeset,zone,host,prober_float_service)
- record: target:probe_success:ratio
expr: target:probe_success:sum / target:probe_success:count
# Sum prober metrics over targets, aggregating by probe.
- record: probe:probe_success:count
expr: count(probe_success) by (probe,probeset,zone)
expr: count(probe_success) by (probe,probeset,prober_float_service,zone)
- record: probe:probe_success:sum
expr: sum(probe_success) by (probe,probeset,zone)
expr: sum(probe_success) by (probe,probeset,prober_float_service,zone)
- record: probe:probe_success:ratio
expr: probe:probe_success:sum / probe:probe_success:count
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment