alerts_auth.conf.yml 1.91 KB
Newer Older
1
2
3
groups:
  - name: roles/ai3-prometheus/files/rules/alerts_auth.conf
    rules:
4
5
6
7
8
9
10

      # Alert if the error ratio of authentications becomes too high.
      #
      # The smtp service is excluded because due to constant brute
      # forcing attempts there are just too many failures.
      #
      # Services that are excluded here should be added to NoLogins.
11
      - alert: TooManyLoginFailures
12
        expr: service:auth_requests_ok:ratio{exported_service!="smtp"} < 0.2 and service:auth_requests_ok:delta10m{exported_service!="smtp"} > 5
ale's avatar
ale committed
13
        for: 10m
14
15
        labels:
          severity: page
16
          scope: global
17
        annotations:
18
19
          summary: 'Too many login failures for service {{$labels.exported_service}}'
          description: 'The percentage of successful authentications on auth-server with service={{$labels.exported_service}} is too low ({{$value}}). This can indicate a brute-forcing attack (depending on the service), or a failure in the auth-server itself.'
ale's avatar
ale committed
20
          runbook: '[[ alert_playbook_url ]]/TooManyLoginFailures'
21
22
23
24
25
26
27
28
29

      # We also want to check that, for some important services, there
      # actually are any successful logins. The threshold is low on
      # purpose, we're just trying to figure out if the service is
      # completely broken or not.
      #
      # Services included in the regexp match here should be excluded
      # from the TooManyLoginFailures alert.
      - alert: NoLogins
ale's avatar
ale committed
30
        expr: service:auth_requests_ok:delta10m{exported_service="smtp"} < 3
31
32
33
34
35
36
37
        for: 10m
        labels:
          severity: page
          scope: global
        annotations:
          summary: 'No successful logins for service {{$labels.exported_service}}'
          description: 'The auth-server is not reporting successful logins with service={{$labels.exported_service}}. This might indicate something broken with the auth-server itself.'
ale's avatar
ale committed
38
          runbook: '[[ alert_playbook_url ]]/NoLogins'
ale's avatar
ale committed
39