diff --git a/roles/float-base/files/node-exporter-scripts/smartmon.py b/roles/float-base/files/node-exporter-scripts/smartmon.py index f27915da8f6b49aedfd6f7cb7ad7d0b874111741..7f32f77f67cd83892defaa1b36800763abd38f28 100644 --- a/roles/float-base/files/node-exporter-scripts/smartmon.py +++ b/roles/float-base/files/node-exporter-scripts/smartmon.py @@ -191,12 +191,26 @@ def smartd_devices(config='/etc/smartd.conf'): yield Device.from_string(line.strip()) +def _wrap_hours(power_on_hours, n): + # SMART self-test lifetime_hours parameter is a 16-bit field that + # wraps around. Try to put it back in the range of the current + # value of power_on_hours, although now we're just making up data + # and this will fail spectacularly in certain circumstances. + if power_on_hours < 65536: + return n + if n < (power_on_hours & 0xffff): + return (power_on_hours & 0xffff0000) + n + return n + + def collect_self_test_status(device, data): """Extract SMART self-test status from logs.""" if 'ata_smart_self_test_log' not in data or \ 'table' not in data['ata_smart_self_test_log']['standard']: return + power_on_hours = data['power_on_time']['hours'] + # Attempt to extract the most recent self test status by type. most_recent_test_by_type = {} for test in data['ata_smart_self_test_log']['standard']['table']: @@ -208,7 +222,8 @@ def collect_self_test_status(device, data): labels = {'test': to_label_value(test['type']['string'])} labels.update(device.labels) yield Gauge('self_test_status', labels, test['status']['passed']) - yield Gauge('self_test_hours', labels, test['lifetime_hours']) + yield Gauge('self_test_hours', labels, + _wrap_hours(power_on_hours, test['lifetime_hours'])) def collect_ata_attributes(device, data):