From 5506e931bfa550f2d47f170a037fc22274fcdcc7 Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Tue, 17 Dec 2024 15:41:42 +0000
Subject: [PATCH] Account for 16-bit overflow in SMART self-test lifetime_hours
 parameter

---
 .../files/node-exporter-scripts/smartmon.py     | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/roles/float-base/files/node-exporter-scripts/smartmon.py b/roles/float-base/files/node-exporter-scripts/smartmon.py
index f27915da..7f32f77f 100644
--- a/roles/float-base/files/node-exporter-scripts/smartmon.py
+++ b/roles/float-base/files/node-exporter-scripts/smartmon.py
@@ -191,12 +191,26 @@ def smartd_devices(config='/etc/smartd.conf'):
                 yield Device.from_string(line.strip())
 
 
+def _wrap_hours(power_on_hours, n):
+    # SMART self-test lifetime_hours parameter is a 16-bit field that
+    # wraps around. Try to put it back in the range of the current
+    # value of power_on_hours, although now we're just making up data
+    # and this will fail spectacularly in certain circumstances.
+    if power_on_hours < 65536:
+        return n
+    if n < (power_on_hours & 0xffff):
+        return (power_on_hours & 0xffff0000) + n
+    return n
+
+
 def collect_self_test_status(device, data):
     """Extract SMART self-test status from logs."""
     if 'ata_smart_self_test_log' not in data or \
        'table' not in data['ata_smart_self_test_log']['standard']:
         return
 
+    power_on_hours = data['power_on_time']['hours']
+
     # Attempt to extract the most recent self test status by type.
     most_recent_test_by_type = {}
     for test in data['ata_smart_self_test_log']['standard']['table']:
@@ -208,7 +222,8 @@ def collect_self_test_status(device, data):
         labels = {'test': to_label_value(test['type']['string'])}
         labels.update(device.labels)
         yield Gauge('self_test_status', labels, test['status']['passed'])
-        yield Gauge('self_test_hours', labels, test['lifetime_hours'])
+        yield Gauge('self_test_hours', labels,
+                    _wrap_hours(power_on_hours, test['lifetime_hours']))
 
 
 def collect_ata_attributes(device, data):
-- 
GitLab