Skip to content
Snippets Groups Projects
Commit d9f6e97d authored by ale's avatar ale
Browse files

Add hark alerts

parent cf8a96f8
No related branches found
No related tags found
No related merge requests found
......@@ -17,7 +17,7 @@ groups:
annotations:
summary: 'Too many login failures for service {{$labels.exported_service}}'
description: 'The percentage of successful authentications on auth-server with service={{$labels.exported_service}} is too low ({{$value}}). This can indicate a brute-forcing attack (depending on the service), or a failure in the auth-server itself.'
runbook: '[[ alert_playbook_url ]]/TooManyLoginFailures.md'
runbook: '[[ alert_playbook_url ]]/TooManyLoginFailures'
# We also want to check that, for some important services, there
# actually are any successful logins. The threshold is low on
......@@ -35,5 +35,5 @@ groups:
annotations:
summary: 'No successful logins for service {{$labels.exported_service}}'
description: 'The auth-server is not reporting successful logins with service={{$labels.exported_service}}. This might indicate something broken with the auth-server itself.'
runbook: '[[ alert_playbook_url ]]/NoLogins.md'
runbook: '[[ alert_playbook_url ]]/NoLogins'
......@@ -29,7 +29,7 @@ groups:
annotations:
summary: "Postfix is deferring many messages on {{$labels.postfix_instance}}"
description: "The Postfix instance {{$labels.postfix_instance}} is unexpectedly deferring lots of messages. Perhaps some of the expected destinations are unreachable."
runbook: "[[ alert_playbook_url ]]/PostfixHighDeferred.md"
runbook: "[[ alert_playbook_url ]]/PostfixHighDeferred"
- alert: PostfixHighBounces
expr: 'instance:smtp_bounces:rate10m{postfix_instance!="postfix-out"} > 2'
......@@ -40,7 +40,7 @@ groups:
annotations:
summary: "Postfix is bouncing many messages on {{$labels.postfix_instance}}"
description: "The Postfix instance {{$labels.postfix_instance}} is unexpectedly bouncing lots of messages. Perhaps there are issues with the user databases."
runbook: "[[ alert_playbook_url ]]/PostfixHighBounces.md"
runbook: "[[ alert_playbook_url ]]/PostfixHighBounces"
# Warn if there are many rejects on an instance: something may (or may not) be broken.
- alert: PostfixHighRejects
......@@ -63,7 +63,7 @@ groups:
annotations:
summary: "Postfix is rejecting many messages on {{$labels.postfix_instance}}"
description: "The Postfix instance {{$labels.postfix_instance}} is rejecting a high number of messages. Could be a spam attack, or something broken in the address delivery (if the failing deliveries are for real emails), which is often a DNS problem."
runbook: "[[ alert_playbook_url ]]/PostfixHighRejects.md"
runbook: "[[ alert_playbook_url ]]/PostfixHighRejects"
# Page if there are multiple Postfix instances with high rejects: stuff is really broken.
- alert: PostfixHighRejects
......@@ -75,7 +75,7 @@ groups:
annotations:
summary: "Postfix is rejecting many messages"
description: "Multiple Postfix instances are rejecting a high number of messages. There is most likely something wrong with our internal email delivery."
runbook: "[[ alert_playbook_url ]]/PostfixHighRejects.md"
runbook: "[[ alert_playbook_url ]]/PostfixHighRejects"
# Alert when many emails are ratelimited (likely an account compromise)
- alert: PostfixHighRatelimit
......
groups:
- name: roles/ai3-prometheus/files/rules/alerts_security.conf
rules:
- alert: UnexpectedConnections
expr: sum(delta(unexpected_connections{job="hark"}[1m]) by (host)) > 0
for: 1m
labels:
severity: page
scope: host
annotations:
runbook: '[[ alert_playbook_url ]]/UnexpectedConnections'
summary: '[SECURITY] Unexpected connections to {{$labels.host}}'
description: |
The 'hark' canary listener has detected unexpected connections
on host {{$labels.host}}, this could be a sign of an intruder
running a port scan on the internal network.
......@@ -13,7 +13,7 @@ groups:
description: 'Probe {{ $labels.probe }} ({{ $labels.zone }}) is failing
for target {{ $labels.target }} (success ratio {{ $value }}). Check
https://service-prober.autistici.org/ for the error details.'
runbook: '[[ alert_playbook_url ]]/ProbeFailure.md'
runbook: '[[ alert_playbook_url ]]/ProbeFailure'
- alert: ProbeFailure
expr: probe:probe_success:ratio{probeset="service"} < 0.5
......@@ -26,5 +26,5 @@ groups:
description: 'Probe {{ $labels.probe }} ({{ $labels.zone }}) is failing
globally (success ratio {{ $value }}). Check
https://service-prober.autistici.org/ for the error details.'
runbook: '[[ alert_playbook_url ]]/ProbeFailure.md'
runbook: '[[ alert_playbook_url ]]/ProbeFailure'
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment