Skip to content
Snippets Groups Projects
Commit f2972b7b authored by ale's avatar ale
Browse files

Add another DiskUnhealthy trigger when disks fail SMART self-tests

parent 43bf82d2
Branches
No related tags found
No related merge requests found
Pipeline #82602 passed
...@@ -3,13 +3,22 @@ groups: ...@@ -3,13 +3,22 @@ groups:
rules: rules:
- alert: DiskUnhealthy - alert: DiskUnhealthy
expr: smartmon_device_smart_healthy < 1 expr: smartmon_device_smart_healthy < 1
for: 2h for: 1h
labels: labels:
severity: warn severity: warn
annotations: annotations:
summary: "Disk {{ $labels.disk }} on {{ $labels.host }} is unhealthy" summary: "Disk {{ $labels.disk }} on {{ $labels.host }} is unhealthy"
description: "Disk {{ $labels.disk }} on {{ $labels.host }} is reporting unhealthy SMART status and might need to be replaced." description: "Disk {{ $labels.disk }} on {{ $labels.host }} is reporting unhealthy SMART status and might need to be replaced."
- alert: DiskUnhealthy
expr: smartmon_self_test_status < 1
for: 1h
labels:
severity: warn
annotations:
summary: "Disk {{ $labels.disk }} on {{ $labels.host }} failed its self-test"
description: "Disk {{ $labels.disk }} on {{ $labels.host }} failed its {{ $labels.test }} SMART self-test, and might need to be replaced."
- alert: RAIDDeviceUnhealthy - alert: RAIDDeviceUnhealthy
expr: node_md_degraded > 0 expr: node_md_degraded > 0
for: 15m for: 15m
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment