Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • ai3/float
  • micah/float
2 results
Select Git revision
Show changes
Commits on Source (14)
......@@ -35,7 +35,7 @@
- name: Generate ssh_known_hosts
copy:
dest: /etc/ssh/ssh_known_hosts
content: "@cert-authority {{ lookup('file', credentials_dir + '/ssh/key.pub') }}"
content: "@cert-authority *.{{ domain }} {{ lookup('file', credentials_dir + '/ssh/key.pub') }}"
# Copy the configuration files.
- name: Install SSH configuration (files)
......
......@@ -44,6 +44,7 @@
state: stopped
enabled: no
masked: yes
ignore_errors: true
- name: Remove old rsyslog-collector systemd unit
file:
......
......@@ -11,5 +11,8 @@ alertmanager_smtp_hello: "localhost"
alertmanager_smtp_auth_username: ""
alertmanager_smtp_auth_password: ""
# Define if you have a playbook website
#alertmanager_playbook_url: ""
# Custom blackbox probes.
prometheus_custom_blackbox_probes: {}
......@@ -55,7 +55,7 @@ groups:
redundancy ({{ $value }}) and may eventually be at risk.'
- alert: JobDown
expr: job:up:ratio < 0.51
expr: job:up:ratio < 0.5
for: 5m
labels:
severity: page
......@@ -65,7 +65,7 @@ groups:
description: 'Job {{ $labels.job }} is down globally (availability {{ $value }}).'
- alert: ProbeFailure
expr: target:probe_success:ratio{probe!="ping"} < 0.5
expr: target:probe_success:ratio{probe!="ping",probeset!="service"} < 0.5
for: 5m
labels:
severity: page
......@@ -76,7 +76,7 @@ groups:
for target {{ $labels.target }} (success ratio {{ $value }}).'
- alert: ProbeFailure
expr: probe:probe_success:ratio{probe!="ping"} < 0.5
expr: probe:probe_success:ratio{probe!="ping",probeset!="service"} < 0.5
for: 5m
labels:
severity: page
......
groups:
- name: roles/prometheus/files/rules/alerts_smart.conf
- name: roles/prometheus/files/rules/alerts_system_health.conf
rules:
- alert: DiskUnhealthy
expr: smartmon_device_smart_healthy < 1
......@@ -18,3 +18,13 @@ groups:
annotations:
summary: "RAID device {{ $labels.md_device }} on {{ $labels.host }} is unhealthy"
description: "The RAID device {{ $labels.md_device }} on {{ $labels.host }} is reporting a degraded state, which means that probably one or more of the disks in the array have failed."
- alert: HostPhysicalComponentTooHot
expr: node_hwmon_temp_celsius / node_hwmon_temp_crit_celsius > 0.95
for: 2h
labels:
severity: page
annotations:
summary: "A physical component is running too hot on {{ $labels.host }}"
description: "A sensor is reporting that a physical component ({{ $labels.sensor }}/{{ $labels.chip }}) on {{ $labels.host }} has been running very close to the critical level ({{ $value }}) for the last 2 hours."
......@@ -11,17 +11,17 @@ groups:
# Sum prober metrics over the probers (hosts), producing
# an aggregation by target.
- record: target:probe_success:count
expr: count(probe_success) by (probe,zone,target)
expr: count(probe_success) by (probe,probeset,zone,target)
- record: target:probe_success:sum
expr: sum(probe_success) by (probe,zone,target)
expr: sum(probe_success) by (probe,probeset,zone,target)
- record: target:probe_success:ratio
expr: target:probe_success:sum / target:probe_success:count
# Sum prober metrics over targets, aggregating by probe.
- record: probe:probe_success:count
expr: count(probe_success) by (probe,zone)
expr: count(probe_success) by (probe,probeset,zone)
- record: probe:probe_success:sum
expr: sum(probe_success) by (probe,zone)
expr: sum(probe_success) by (probe,probeset,zone)
- record: probe:probe_success:ratio
expr: probe:probe_success:sum / probe:probe_success:count
......
......@@ -21,6 +21,14 @@
- files/rules/*.conf.yml
notify: "reload prometheus"
- name: Install alertmanager templates
template:
src: "{{ item }}"
dest: "/etc/prometheus/alertmanager_templates/"
with_fileglob:
- templates/alertmanager_templates/*
notify: "reload prometheus"
- name: Install Prometheus configs
template:
src: "{{ item }}.j2"
......
......@@ -20,7 +20,7 @@ route:
# Only severity=page alerts are ever sent anywhere.
routes:
- receiver: 'alert-email'
- receiver: alert
match:
severity: page
......@@ -56,15 +56,17 @@ inhibit_rules:
receivers:
- name: default
- name: 'alert-email'
- name: alert
{% if alert_email %}
email_configs:
- to: '{{ alert_email }}'
html: '{% raw %}{{ template "email.float.html" . }}{% endraw %}'
{% endif %}
{% for webhook in alert_webhook_receivers | default([]) %}
- name: "{{ webhook.name }}"
{% if alert_webhook_receivers | default([]) %}
webhook_configs:
{% for webhook in alert_webhook_receivers %}
- url: "{{ webhook.url }}"
send_resolved: {{ webhook.send_resolved | default(False) | to_json }}
{% endfor %}
{% endif %}
{% if alertmanager_playbook_url is defined %}
{% raw %}{{ define "playbook_url.html" }}{% endraw %}
<a href="{{ alertmanager_playbook_url }}/{% raw %}{{.Labels.alertname}}{% endraw %}.md">Playbook</a><br />
{% raw %}{{ end }}{% endraw %}
{% else %}
{% raw %}
{{ define "playbook_url.html" }}{{ end }}
{% endraw %}
{% endif %}
{% raw %}
{{ define "email.float.html" }}
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta name="viewport" content="width=device-width" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>{{ template "__subject" . }}</title>
<style>
/* -------------------------------------
GLOBAL
A very basic CSS reset
------------------------------------- */
* {
margin: 0;
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
box-sizing: border-box;
font-size: 14px;
}
img {
max-width: 100%;
}
body {
-webkit-font-smoothing: antialiased;
-webkit-text-size-adjust: none;
width: 100% !important;
height: 100%;
line-height: 1.6em;
/* 1.6em * 14px = 22.4px, use px to get airier line-height also in Thunderbird, and Yahoo!, Outlook.com, AOL webmail clients */
/*line-height: 22px;*/
}
/* Let's make sure all tables have defaults */
table td {
vertical-align: top;
}
/* -------------------------------------
BODY & CONTAINER
------------------------------------- */
body {
background-color: #f6f6f6;
}
.body-wrap {
background-color: #f6f6f6;
width: 100%;
}
.container {
display: block !important;
max-width: 600px !important;
margin: 0 auto !important;
/* makes it centered */
clear: both !important;
}
.content {
max-width: 600px;
margin: 0 auto;
display: block;
padding: 20px;
}
/* -------------------------------------
HEADER, FOOTER, MAIN
------------------------------------- */
.main {
background-color: #fff;
border: 1px solid #e9e9e9;
border-radius: 3px;
}
.content-wrap {
padding: 30px;
}
.content-block {
padding: 0 0 20px;
}
.header {
width: 100%;
margin-bottom: 20px;
}
.footer {
width: 100%;
clear: both;
color: #999;
padding: 20px;
}
.footer p, .footer a, .footer td {
color: #999;
font-size: 12px;
}
/* -------------------------------------
TYPOGRAPHY
------------------------------------- */
h1, h2, h3 {
font-family: "Helvetica Neue", Helvetica, Arial, "Lucida Grande", sans-serif;
color: #000;
margin: 40px 0 0;
line-height: 1.2em;
font-weight: 400;
}
h1 {
font-size: 32px;
font-weight: 500;
/* 1.2em * 32px = 38.4px, use px to get airier line-height also in Thunderbird, and Yahoo!, Outlook.com, AOL webmail clients */
/*line-height: 38px;*/
}
h2 {
font-size: 24px;
/* 1.2em * 24px = 28.8px, use px to get airier line-height also in Thunderbird, and Yahoo!, Outlook.com, AOL webmail clients */
/*line-height: 29px;*/
}
h3 {
font-size: 18px;
/* 1.2em * 18px = 21.6px, use px to get airier line-height also in Thunderbird, and Yahoo!, Outlook.com, AOL webmail clients */
/*line-height: 22px;*/
}
h4 {
font-size: 14px;
font-weight: 600;
}
p, ul, ol {
margin-bottom: 10px;
font-weight: normal;
}
p li, ul li, ol li {
margin-left: 5px;
list-style-position: inside;
}
/* -------------------------------------
LINKS & BUTTONS
------------------------------------- */
a {
color: #348eda;
text-decoration: underline;
}
.btn-primary {
text-decoration: none;
color: #FFF;
background-color: #348eda;
border: solid #348eda;
border-width: 10px 20px;
line-height: 2em;
/* 2em * 14px = 28px, use px to get airier line-height also in Thunderbird, and Yahoo!, Outlook.com, AOL webmail clients */
/*line-height: 28px;*/
font-weight: bold;
text-align: center;
cursor: pointer;
display: inline-block;
border-radius: 5px;
text-transform: capitalize;
}
/* -------------------------------------
OTHER STYLES THAT MIGHT BE USEFUL
------------------------------------- */
.last {
margin-bottom: 0;
}
.first {
margin-top: 0;
}
.aligncenter {
text-align: center;
}
.alignright {
text-align: right;
}
.alignleft {
text-align: left;
}
.clear {
clear: both;
}
/* -------------------------------------
ALERTS
Change the class depending on warning email, good email or bad email
------------------------------------- */
.alert {
font-size: 16px;
color: #fff;
font-weight: 500;
padding: 20px;
text-align: center;
border-radius: 3px 3px 0 0;
}
.alert a {
color: #fff;
text-decoration: none;
font-weight: 500;
font-size: 16px;
}
.alert.alert-warning {
background-color: #E6522C;
}
.alert.alert-bad {
background-color: #D0021B;
}
.alert.alert-good {
background-color: #68B90F;
}
/* -------------------------------------
INVOICE
Styles for the billing table
------------------------------------- */
.invoice {
margin: 40px auto;
text-align: left;
width: 80%;
}
.invoice td {
padding: 5px 0;
}
.invoice .invoice-items {
width: 100%;
}
.invoice .invoice-items td {
border-top: #eee 1px solid;
}
.invoice .invoice-items .total td {
border-top: 2px solid #333;
border-bottom: 2px solid #333;
font-weight: 700;
}
/* -------------------------------------
RESPONSIVE AND MOBILE FRIENDLY STYLES
------------------------------------- */
@media only screen and (max-width: 640px) {
body {
padding: 0 !important;
}
h1, h2, h3, h4 {
font-weight: 800 !important;
margin: 20px 0 5px !important;
}
h1 {
font-size: 22px !important;
}
h2 {
font-size: 18px !important;
}
h3 {
font-size: 16px !important;
}
.container {
padding: 0 !important;
width: 100% !important;
}
.content {
padding: 0 !important;
}
.content-wrap {
padding: 10px !important;
}
.invoice {
width: 100% !important;
}
}
</style>
</head>
<body itemscope itemtype="http://schema.org/EmailMessage">
<table class="body-wrap">
<tr>
<td></td>
<td class="container" width="600">
<div class="content">
<table class="main" width="100%" cellpadding="0" cellspacing="0">
<tr>
{{ if gt (len .Alerts.Firing) 0 }}
<td class="alert alert-warning">
{{ else }}
<td class="alert alert-good">
{{ end }}
{{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }}
{{ .Name }}={{ .Value }}
{{ end }}
</td>
</tr>
<tr>
<td class="content-wrap">
<table width="100%" cellpadding="0" cellspacing="0">
<tr>
<td class="content-block">
<a href='{{ template "__alertmanagerURL" . }}' class="btn-primary">View in {{ template "__alertmanager" . }}</a>
</td>
</tr>
{{ if gt (len .Alerts.Firing) 0 }}
<tr>
<td class="content-block">
<strong>[{{ .Alerts.Firing | len }}] Firing</strong>
</td>
</tr>
{{ end }}
{{ range .Alerts.Firing }}
<tr>
<td class="content-block">
<strong>Labels</strong><br />
{{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br />{{ end }}
{{ if gt (len .Annotations) 0 }}<strong>Annotations</strong><br />{{ end }}
{{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br />{{ end }}
{{ template "playbook_url.html" . }}
<a href="{{ .GeneratorURL }}">Source</a><br />
</td>
</tr>
{{ end }}
{{ if gt (len .Alerts.Resolved) 0 }}
{{ if gt (len .Alerts.Firing) 0 }}
<tr>
<td class="content-block">
<br />
<hr />
<br />
</td>
</tr>
{{ end }}
<tr>
<td class="content-block">
<strong>[{{ .Alerts.Resolved | len }}] Resolved</strong>
</td>
</tr>
{{ end }}
{{ range .Alerts.Resolved }}
<tr>
<td class="content-block">
<strong>Labels</strong><br />
{{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br />{{ end }}
{{ if gt (len .Annotations) 0 }}<strong>Annotations</strong><br />{{ end }}
{{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br />{{ end }}
<a href="{{ .GeneratorURL }}">Source</a><br />
</td>
</tr>
{{ end }}
</table>
</td>
</tr>
</table>
<div class="footer">
<table width="100%">
<tr>
<td class="aligncenter content-block"><a href='{{ .ExternalURL }}'>Sent by {{ template "__alertmanager" . }}</a></td>
</tr>
</table>
</div></div>
</td>
<td></td>
</tr>
</table>
</body>
</html>
{{ end }}
{% endraw %}
......@@ -124,6 +124,7 @@ scrape_configs:
labels:
zone: internal
probe: ping
probeset: base
- job_name: "prober_https_{{ prober_idx }}"
metrics_path: "/probe"
......@@ -150,7 +151,8 @@ scrape_configs:
{% endfor %}
labels:
zone: public
probe: https_up
probe: https
probeset: base
{% for custom_probe in prometheus_custom_blackbox_probes.get('http', []) %}
- job_name: "prober_https_{{ custom_probe.name }}_{{ prober_idx }}"
......@@ -178,7 +180,8 @@ scrape_configs:
{% endfor %}
labels:
zone: public
probe: https
probe: https_{{ custom_probe.name }}
probeset: custom
{% endfor %}
- job_name: "prober_dns_{{ prober_idx }}"
......@@ -206,6 +209,7 @@ scrape_configs:
labels:
zone: public
probe: dns
probeset: base
{% endfor %}
......@@ -216,6 +220,8 @@ scrape_configs:
metrics_path: "{{ target.metrics_path | default('/metrics') }}"
static_configs:
- targets: {{ target.targets | to_json }}
labels:
zone: external
relabel_configs:
- source_labels: [__address__]
target_label: host
......