Commit d5c5e226 authored by ale's avatar ale

NGINX error metrics should only include 5xx errors

Previously we were also counting 4xx errors, which are not relevant
as a reliability indicator. We may eventually want to restrict this
further to 502/503 (for backend timeouts only).
parent e85ea508
Pipeline #3368 passed with stage
in 5 minutes and 6 seconds
......@@ -10,7 +10,7 @@ groups:
severity: page
annotations:
summary: 'High HTTP error ratio for {{$labels.vhost}} globally'
description: 'We are serving lots of 4xx/5xx errors for {{$labels.vhost}} on all frontends.'
description: 'We are serving lots of 5xx errors for {{$labels.vhost}} on all frontends.'
- alert: HTTPErrorRatioHigh
expr: (instance:nginx_http_requests_errs:ratio > 0.02 and instance:nginx_http_requests_total:rate5m > 0.1)
for: 10m
......@@ -20,4 +20,4 @@ groups:
severity: page
annotations:
summary: 'High HTTP error ratio for {{$labels.vhost}} on {{$labels.host}}'
description: 'We are serving lots of 4xx/5xx errors for {{$labels.vhost}} on {{$labels.host}}.'
description: 'We are serving lots of 5xx errors for {{$labels.vhost}} on {{$labels.host}}.'
......@@ -4,14 +4,14 @@ groups:
- record: instance:nginx_http_requests_total:rate5m
expr: sum(rate(nginx_http_requests[5m])) without (backend, method, code)
- record: instance:nginx_http_requests_errs:rate5m
expr: sum(rate(nginx_http_requests{code=~"[45].*"}[5m])) without (backend, method, code)
expr: sum(rate(nginx_http_requests{code=~"5.*"}[5m])) without (backend, method, code)
- record: instance:nginx_http_requests_errs:ratio
expr: (instance:nginx_http_requests_errs:rate5m / instance:nginx_http_requests_total:rate5m)
- record: global:nginx_http_requests_total:rate5m
expr: sum(rate(nginx_http_requests[5m])) without (instance, host, exported_instance,
exported_host, backend, method, code)
- record: global:nginx_http_requests_errs:rate5m
expr: sum(rate(nginx_http_requests{code=~"[45].*"}[5m])) without (instance, host,
expr: sum(rate(nginx_http_requests{code=~"5.*"}[5m])) without (instance, host,
exported_instance, exported_host, backend, method, code)
- record: global:nginx_http_requests_errs:ratio
expr: (global:nginx_http_requests_errs:rate5m / global:nginx_http_requests_total:rate5m)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment