Commit 7591ad0c authored by godog's avatar godog

prometheus: use 'summary' as alerts label, not title

parent 1acad215
Pipeline #6695 passed with stage
in 6 minutes and 15 seconds
......@@ -11,7 +11,7 @@ groups:
annotations:
description: '{{$labels.job}} at {{$labels.instance}} has a 90th percentile
latency of {{$value}}s completing rule evaluation cycles.'
title: '{{$labels.job}} is evaluating rules too slowly'
summary: '{{$labels.job}} is evaluating rules too slowly'
- alert: PrometheusCheckpointingSlow
expr: avg_over_time(prometheus_local_storage_checkpoint_last_duration_seconds{job=~"prometheus.*"}[15m])
......@@ -23,7 +23,7 @@ groups:
annotations:
description: '{{$labels.job}} at {{$labels.instance}} needs {{$value}}s on average
for each checkpoint.'
title: '{{$labels.job}} is checkpointing too slowly'
summary: '{{$labels.job}} is checkpointing too slowly'
- alert: PrometheusIndexingBacklog
expr: prometheus_local_storage_indexing_queue_length{job=~"prometheus.*"} / prometheus_local_storage_indexing_queue_capacity{job=~"prometheus.*"}
......@@ -36,7 +36,7 @@ groups:
description: '{{$labels.job}} at {{$labels.instance}} is backlogging on the
indexing queue for more than 30m. Queue is currently {{$value | printf `%.0f`}}%
full.'
title: '{{$labels.job}} is backlogging on the indexing queue'
summary: '{{$labels.job}} is backlogging on the indexing queue'
- alert: PrometheusNotIngestingSamples
expr: rate(prometheus_local_storage_ingested_samples_total{job=~"prometheus.*"}[5m])
......@@ -48,7 +48,7 @@ groups:
annotations:
description: '{{$labels.job}} at {{$labels.instance}} has not ingested any samples
in the last 10 minutes.'
title: '{{$labels.job}} is not ingesting samples'
summary: '{{$labels.job}} is not ingesting samples'
runbook: '[[ alert_playbook_url ]]/PrometheusNotIngestingSamples.md'
- alert: PrometheusPersistErrors
......@@ -60,7 +60,7 @@ groups:
annotations:
description: '{{$labels.job}} at {{$labels.instance}} has encountered {{$value}}
persist errors per second in the last 10 minutes.'
title: '{{$labels.job}} has persist errors'
summary: '{{$labels.job}} has persist errors'
- alert: PrometheusNotificationsBacklog
expr: prometheus_notifications_queue_length{job=~"prometheus.*"} > 0
......@@ -72,7 +72,7 @@ groups:
description: '{{$labels.job}} at {{$labels.instance}} is backlogging on the
notifications queue. The queue has not been empty for 10 minutes. Current
queue length: {{$value}}.'
title: '{{$labels.job}} is backlogging on the notifications queue'
summary: '{{$labels.job}} is backlogging on the notifications queue'
runbook: '[[ alert_playbook_url ]]/PrometheusNotificationsBacklog.md'
- alert: PrometheusScrapingSlowly
......@@ -86,7 +86,7 @@ groups:
description: '{{$labels.job}} at {{$labels.instance}} has a 90th percentile
latency of {{$value}}s for scraping targets in the {{$labels.interval}} target
pool.'
title: '{{$labels.job}} is scraping targets slowly'
summary: '{{$labels.job}} is scraping targets slowly'
- alert: PrometheusStorageInconsistent
expr: prometheus_local_storage_inconsistencies_total{job=~"prometheus.*"} > 0
......@@ -96,7 +96,7 @@ groups:
annotations:
description: '{{$labels.job}} at {{$labels.instance}} has detected a storage
inconsistency. A server restart is needed to initiate recovery.'
title: '{{$labels.job}} has an inconsistent storage'
summary: '{{$labels.job}} has an inconsistent storage'
- alert: PrometheusPersistencePressureTooHigh
expr: prometheus_local_storage_persistence_urgency_score{job=~"prometheus.*"}
......@@ -109,7 +109,7 @@ groups:
annotations:
description: '{{$labels.job}} at {{$labels.instance}} is approaching critical
persistence pressure. Throttled ingestion expected within the next 24h.'
title: '{{$labels.job}} can not keep up persisting'
summary: '{{$labels.job}} can not keep up persisting'
- alert: PrometheusPersistencePressureTooHigh
expr: prometheus_local_storage_persistence_urgency_score{job=~"prometheus.*"}
......@@ -122,7 +122,7 @@ groups:
annotations:
description: '{{$labels.job}} at {{$labels.instance}} is approaching critical
persistence pressure. Throttled ingestion expected within the next 2h.'
title: '{{$labels.job}} can not keep up persisting'
summary: '{{$labels.job}} can not keep up persisting'
runbook: '[[ alert_playbook_url ]]/PrometheusPersistencePressureTooHigh.md'
- alert: PrometheusSeriesMaintenanceStalled
......@@ -137,7 +137,7 @@ groups:
description: '{{$labels.job}} at {{$labels.instance}} is maintaining memory
time series so slowly that it will take {{$value | printf `%.0f`}}h to complete
a full cycle. This will lead to persistence falling behind.'
title: '{{$labels.job}} is maintaining memory time series too slowly'
summary: '{{$labels.job}} is maintaining memory time series too slowly'
- alert: PrometheusInvalidConfigFile
expr: prometheus_config_last_reload_successful{job=~"prometheus.*"} == 0
......@@ -148,7 +148,7 @@ groups:
annotations:
description: The configuration file for {{$labels.job}} at {{$labels.instance}}
is invalid and was therefore not reloaded.
title: '{{$labels.job}} has an invalid config'
summary: '{{$labels.job}} has an invalid config'
runbook: '[[ alert_playbook_url ]]/PrometheusInvalidConfigFile.md'
- alert: PrometheusOutOfOrderSamplesDiscarded
......@@ -161,4 +161,4 @@ groups:
annotations:
description: '{{$labels.job}} at {{$labels.instance}} has discarded {{$value}}
out-of-order samples over the last hour.'
title: '{{$labels.job}} is discarding out-of-order samples'
summary: '{{$labels.job}} is discarding out-of-order samples'
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment