Skip to content
Snippets Groups Projects
Commit f2972b7b authored by ale's avatar ale
Browse files

Add another DiskUnhealthy trigger when disks fail SMART self-tests

parent 43bf82d2
No related merge requests found
Pipeline #82602 passed
......@@ -3,13 +3,22 @@ groups:
rules:
- alert: DiskUnhealthy
expr: smartmon_device_smart_healthy < 1
for: 2h
for: 1h
labels:
severity: warn
annotations:
summary: "Disk {{ $labels.disk }} on {{ $labels.host }} is unhealthy"
description: "Disk {{ $labels.disk }} on {{ $labels.host }} is reporting unhealthy SMART status and might need to be replaced."
- alert: DiskUnhealthy
expr: smartmon_self_test_status < 1
for: 1h
labels:
severity: warn
annotations:
summary: "Disk {{ $labels.disk }} on {{ $labels.host }} failed its self-test"
description: "Disk {{ $labels.disk }} on {{ $labels.host }} failed its {{ $labels.test }} SMART self-test, and might need to be replaced."
- alert: RAIDDeviceUnhealthy
expr: node_md_degraded > 0
for: 15m
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment