diff --git a/roles/float-infra-prometheus/templates/blackbox.yml.j2 b/roles/float-infra-prometheus/templates/blackbox.yml.j2 index 020533dcc46ed570fd9eb1c86392855e4dec90f9..190aea284979674293a16ff4011da07dbdd8bc39 100644 --- a/roles/float-infra-prometheus/templates/blackbox.yml.j2 +++ b/roles/float-infra-prometheus/templates/blackbox.yml.j2 @@ -5,12 +5,10 @@ modules: prober: http timeout: 5s http: - valid_status_codes: [] + valid_status_codes: [200] method: GET fail_if_ssl: false fail_if_not_ssl: false - fail_if_body_not_matches_regexp: - - "OK" preferred_ip_protocol: ip4 tls_config: ca_file: /etc/credentials/x509/prometheus/ca.pem diff --git a/roles/float-infra-prometheus/templates/prometheus.yml.j2 b/roles/float-infra-prometheus/templates/prometheus.yml.j2 index 57cc9b062c120d7bc55e3a75699df0d45c23842e..7e1905fe6032811f9584d77d4025f04192f1b5cb 100644 --- a/roles/float-infra-prometheus/templates/prometheus.yml.j2 +++ b/roles/float-infra-prometheus/templates/prometheus.yml.j2 @@ -104,6 +104,40 @@ scrape_configs: {% for prober_host in services['prometheus'].hosts|sort %} {% set prober_idx = loop.index %} +{# Create all the health probes for all the monitored services #} +{% for service_name, service in services | dictsort %} + {%- for target_config in service.get('monitoring_endpoints', []) %} + - job_name: "prober_health_{{ service_name | replace('-', '_') }}_{{ loop.index }}" + metrics_path: "/probe" + params: + module: + - http_health + relabel_configs: + - source_labels: [__address__] + target_label: host + regex: "([^.]*).*" + replacement: "${1}" + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: {{ prober_host }}.prometheus.{{ domain }}:9115 + - target_label: prober_host + replacement: {{ prober_host }} + static_configs: + - targets: +{% for host in service.hosts | sort %} + - "{{ target_config.get('scheme', 'http') }}://{{ host }}.{{ service_name }}.{{ domain }}:{{ target_config.port }}{{ target_config.get('healthcheck_path', '/metrics') }}" +{% endfor %} + labels: + zone: internal + probe: health + probeset: health + prober_float_service: prometheus + {%- endfor %} +{% endfor %} + - job_name: "prober_ping_{{ loop.index }}" metrics_path: "/probe" params: