From c32336a6fd09eb21b551333b44abb93ed41c8872 Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Thu, 24 Apr 2025 10:17:47 +0200
Subject: [PATCH] Conflate "deferred" and "active" Postfix queues for alerting

---
 .../templates/rules/alerts_postfix.conf.yml               | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/roles/ai3-prometheus/templates/rules/alerts_postfix.conf.yml b/roles/ai3-prometheus/templates/rules/alerts_postfix.conf.yml
index 38605a79..24a128b7 100644
--- a/roles/ai3-prometheus/templates/rules/alerts_postfix.conf.yml
+++ b/roles/ai3-prometheus/templates/rules/alerts_postfix.conf.yml
@@ -11,7 +11,7 @@ groups:
       # The postfix-out instances should be allowed to have a large
       # deferred queue for outbound messages.
       - alert: PostfixQueueTooLarge
-        expr: postfix_queue_length{postfix_instance="postfix-out",queue="deferred"} > 5000
+        expr: postfix_queue_length{postfix_instance="postfix-out",queue=~"(deferred|active)"} > 5000
         for: 10m
         labels:
           severity: warn
@@ -25,7 +25,7 @@ groups:
             deliveries.
           runbook: '[[ alert_runbook_fmt | format("PostfixQueueTooLarge") ]]'
       - alert: PostfixQueueTooLarge
-        expr: sum(postfix_queue_length{postfix_instance="postfix-out",queue="deferred"}) > 10000
+        expr: sum(postfix_queue_length{postfix_instance="postfix-out",queue=~"(deferred|active)"}) > 10000
         for: 10m
         labels:
           severity: page
@@ -71,7 +71,7 @@ groups:
       # all. Note the longer timeout: it is fine for queues like
       # 'active' or 'incoming' to accomodate temporary spikes.
       - alert: PostfixUnexpectedQueueTooLarge
-        expr: postfix_queue_length{queue!="deferred"} > 50
+        expr: postfix_queue_length{queue!~"(deferred|active)"} > 50
         for: 1h
         labels:
           severity: page
@@ -85,7 +85,7 @@ groups:
             service malfunctioning, or having capacity issues.
           runbook: '[[ alert_runbook_fmt | format("PostfixQueueTooLarge") ]]'
       - alert: PostfixUnexpectedQueueTooLarge
-        expr: sum(postfix_queue_length{queue!="deferred"}) by (postfix_instance, queue) > 100
+        expr: sum(postfix_queue_length{queue!~"(deferred|active)"}) by (postfix_instance, queue) > 100
         for: 1h
         labels:
           severity: page
-- 
GitLab