Commit b971e459 authored by ale's avatar ale
Browse files

Merge branch 'loki' of 0xacab.org:micah/float into loki

parents dab15c51 086a83e9
Pipeline #7215 passed with stage
in 2 minutes and 57 seconds
......@@ -89,6 +89,21 @@ the public-facing DNS zones, if unset it defaults to `ip`
`groups` (optional) is a list of groups that this host should be a
member of.
`resolver_mode` (optional) controls the desired state of the host's
*resolv.conf* file. The supported values are:
* *ignore* - do nothing and leave resolv.conf alone
* *localhost* - use localhost as a resolver, presumably some other
role will have installed a DNS cache there
* *internal:NET* - use the frontend hosts as resolvers, over the
specified overlay network named NET
* *external* - use Google Public DNS.
Note that due to ordering issues it is advised to set the *resolver_mode*
attribute on hosts only after the first setup is complete, to avoid
breaking DNS resolution while Ansible is running.
## Example
An example of a valid inventory file (for a hypotetic Vagrant
......@@ -480,6 +495,12 @@ attribute.
*master_election* attribute is also true, the backup job will only be
run on the master host for the service.
`sharded`: When this attribute is true, the dataset is considered a
sharded (partitioned) dataset, so float will **not** automatically
attempt to restore it on new servers: the idea is that for sharded
datasets, the application layer is responsible for data management.
This attribute is false by default.
`owner`: For filesystem paths, the user that will own the files upon
restore.
......
......@@ -156,6 +156,7 @@ def command_run(config, playbooks,
ansible_check=False,
ansible_diff=False,
ansible_stdout=None,
ansible_limit=None,
ansible_extra_vars=[]):
if not os.path.exists(config):
raise Exception(
......@@ -185,6 +186,8 @@ def command_run(config, playbooks,
cmd.append('--check')
if ansible_diff:
cmd.append('--diff')
if ansible_limit:
cmd.extend(['--limit', ansible_limit])
for v in ansible_extra_vars:
cmd.append('--extra-vars=' + v)
cmd.append(arg)
......@@ -401,6 +404,9 @@ themselves.
'-e', '--extra-vars', dest='ansible_extra_vars',
action='append', default=[],
help='Extra variables for Ansible')
run_parser.add_argument(
'-l', '--limit', dest='ansible_limit',
help='Limit to selected hosts')
kwargs = vars(parser.parse_args())
cmd = kwargs.pop('subparser')
......
......@@ -3,7 +3,7 @@
- hosts: all
roles:
- base
- credentials
- float-credentials
- vagrant-compat
- hosts: net-overlay
......
......@@ -358,7 +358,6 @@ def _host_vars(name, inventory, services, assignments):
'float_disabled_services': [],
'float_enabled_containers': [],
'float_host_service_credentials': [],
'float_host_service_credentials_certs': [],
'float_host_overlay_networks': _host_net_overlays(name, inventory),
'float_host_dns_map': _host_service_dns_map(
name, inventory, services, assignments),
......@@ -366,8 +365,7 @@ def _host_vars(name, inventory, services, assignments):
# Add default client credentials that are present on all hosts.
for c in DEFAULT_SERVICE_CREDENTIALS:
hv['float_host_service_credentials'].append({'credentials': c})
hv['float_host_service_credentials_certs'].append({
hv['float_host_service_credentials'].append({
'credentials': c, 'service': 'LOCAL',
'mode': 'client', 'x509_params': {}})
......@@ -390,15 +388,13 @@ def _host_vars(name, inventory, services, assignments):
for u in services[s].get('systemd_services', []):
enabled_systemd_units.add(u)
for c in services[s].get('service_credentials', []):
hv['float_host_service_credentials'].append({
'service': s, 'credentials': c})
if c.get('enable_server', True):
params = _service_credential_params(name, s, inventory, assignments)
hv['float_host_service_credentials_certs'].append({
hv['float_host_service_credentials'].append({
'credentials': c, 'service': s,
'mode': 'server', 'x509_params': params})
if c.get('enable_client', True):
hv['float_host_service_credentials_certs'].append({
hv['float_host_service_credentials'].append({
'credentials': c, 'service': s,
'mode': 'client', 'x509_params': {}})
else:
......
......@@ -21,3 +21,11 @@ emergency_ssh_key: ""
# The Debian distribution that we are using as the basis.
float_debian_dist: "buster"
# How to configure resolv.conf, one of the following options:
# 'ignore' - do nothing and leave resolv.conf alone
# 'localhost' - use localhost as the name server (presumably a cache)
# 'internal:NET' - use the frontend hosts as resolvers, over the
# specified overlay network NET
# 'external' - use Google Public DNS.
resolver_mode: "ignore"
......@@ -6,9 +6,12 @@ import datetime
import decimal
import itertools
import re
import os
import shlex
import subprocess
GUARD_FILE = '/etc/smartmon.disable'
device_info_re = re.compile(r'^(?P<k>[^:]+?)(?:(?:\sis|):)\s*(?P<v>.*)$')
ata_error_count_re = re.compile(
......@@ -386,6 +389,10 @@ def collect_disks_smart_metrics():
def main():
# Guard file to stop this automation in an emergency.
if os.path.exists(GUARD_FILE):
sys.exit(0)
version_metric = Metric('smartctl_version', {
'version': smart_ctl_version()
}, True)
......
......@@ -11,7 +11,10 @@
dataset_owner: "{{ item.1.get('owner', '') }}"
dataset_path: "{{ item.1.get('path', '') }}"
dataset_type: "{% if 'backup_command' in item.1 %}pipe{% else %}file{% endif %}"
dataset_should_backup: "{{ item.0.name in float_enabled_services and ((not item.1.get('on_master_only', False)) or (item.0.get('master_host') == inventory_hostname)) }}"
dataset_should_backup: "{{ (item.0.name in float_enabled_services) and ((not item.1.get('on_master_only', False)) or (item.0.get('master_host') == inventory_hostname)) }}"
- set_fact:
dataset_should_restore: "{{ dataset_should_backup and not item.1.get('sharded', False) }}"
- name: Set up configuration for dataset {{ dataset.name }} (source)
template:
......@@ -47,20 +50,28 @@
src: "tabacco/restore-script.j2"
dest: "/usr/lib/float/datasets/restore-{{ dataset_filename }}"
mode: 0755
when: dataset_should_backup
when: dataset_should_restore
- name: Create restore service unit
template:
src: "tabacco/restore-service.j2"
dest: "/etc/systemd/system/restore-{{ dataset_filename }}.service"
mode: 0444
when: dataset_should_backup
when: dataset_should_restore
- name: Cleanup restore service unit
file:
path: "/etc/systemd/system/restore-{{ dataset_filename }}.service"
state: absent
when: "not dataset_should_restore"
notify: reload systemd
- name: Enable restore service unit
systemd:
name: "restore-{{ dataset_filename }}.service"
enabled: yes
when: dataset_should_backup
when: dataset_should_restore
notify: reload systemd
- name: Wipe dataset restore guard file
file:
......
......@@ -35,6 +35,6 @@
- name: Configure systemd
copy:
src: "system.conf"
dest: "/etc/systemd/system.dconf"
dest: "/etc/systemd/system.conf"
notify: reload systemd
......@@ -49,6 +49,7 @@
packages:
- auditd
- audisp-json
when: not enable_loki
- name: Auditd default config removed
file:
......@@ -63,6 +64,7 @@
- "templates/audit/rules.d/*.j2"
notify:
- restart auditd
when: not enable_loki
- name: Auditd configured
template:
......@@ -70,6 +72,7 @@
dest: /etc/audit/auditd.conf
notify:
- restart auditd
when: not enable_loki
- name: Audispd plugins configured
copy:
......@@ -80,11 +83,13 @@
- json.conf
notify:
- restart auditd
when: not enable_loki
- name: Enable auditd service
systemd:
name: auditd.service
enabled: yes
when: not enable_loki
- name: Disable journald-auditd link
systemd:
......@@ -92,3 +97,4 @@
state: stopped
enabled: no
masked: yes
when: not enable_loki
......@@ -86,3 +86,10 @@
- import_tasks: ipmi.yml
when: ipmi_device.stat.exists == true
- name: Configure resolv.conf
template:
src: "resolv.conf.j2"
dest: "/etc/resolv.conf"
when: "resolver_mode != 'ignore'"
{% if resolver_mode == 'localhost' %}
nameserver 127.0.0.1
options edns0
{% elif resolver_mode.startswith('internal:') %}
{% set dns_overlay_net = resolver_mode[9:] %}
{% for h in groups['frontend'] | sort %}
nameserver {{ hostvars[h]['ip_' + dns_overlay_net] }}
{% endfor %}
options edns0 rotate
{% else %}
nameserver 8.8.8.8
nameserver 8.8.4.4
options edns0
{% endif %}
......@@ -22,6 +22,11 @@ ruleset(name="incoming") {
# Send a copy of everything to mtail.
action(type="ompipe"
Pipe="/run/mtail.fifo"
# Protect the main queue from mtail pipe full: discard messages on
# ompipe action queue full.
queue.type="FixedArray"
queue.size="4096"
queue.timeoutEnqueue="0"
action.resumeRetryCount="-1"
action.resumeInterval="2"
action.resumeIntervalMax="30")
......
......@@ -56,12 +56,16 @@ fs.suid_dumpable=0
# Disable source routed packet acceptance.
net.ipv4.conf.all.accept_source_route=0
net.ipv4.conf.default.accept_source_route=0
net.ipv6.conf.all.accept_source_route=0
net.ipv6.conf.default.accept_source_route=0
# Do not accept ICMP redirects.
net.ipv4.conf.all.accept_redirects=0
net.ipv4.conf.default.accept_redirects=0
net.ipv4.conf.all.secure_redirects=0
net.ipv4.conf.default.secure_redirects=0
net.ipv6.conf.all.accept_redirects=0
net.ipv6.conf.default.accept_redirects=0
# Do not send ICMP redirects.
net.ipv4.conf.all.send_redirects=0
......@@ -72,3 +76,54 @@ net.ipv4.icmp_ignore_bogus_error_responses=1
# Enable RFC-recommended source route validation.
net.ipv4.conf.all.rp_filter=1
net.ipv4.conf.default.rp_filter=1
# Prevent the automatic loading of line disciplines
# https://lore.kernel.org/patchwork/patch/1034150
dev.tty.ldisc_autoload=0
# Additional protections for fifos, hardlinks, regular files, and symlinks
# https://patchwork.kernel.org/patch/10244781
# slightly tightened up from the systemd default values of "1" for each
fs.protected_fifos=2
fs.protected_hardlinks=1
fs.protected_regular=2
fs.protected_symlinks=1
# Disable the kexec system call (can be used to replace the running kernel)
# https://lwn.net/Articles/580269
# (linux-hardened default)
kernel.kexec_load_disabled=1
# Impose restrictions on exposing kernel pointers
# https://lwn.net/Articles/420403
# (linux-hardened default)
kernel.kptr_restrict=2
# Restrict use of the performance events system by unprivileged users
# https://lwn.net/Articles/696216
# (linux-hardened default)
kernel.perf_event_paranoid=3
# Disable the "magic sysrq key" functionality
# https://security.stackexchange.com/questions/138658
# https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1861238
kernel.sysrq=0
# Harden the BPF JIT compiler and restrict unprivileged use of BPF
# https://www.zerodayinitiative.com/advisories/ZDI-20-350
# https://lwn.net/Articles/660331
# (linux-hardened default)
net.core.bpf_jit_harden=2
kernel.unprivileged_bpf_disabled=1
# Disable unprivileged user namespaces
# https://lwn.net/Articles/673597
# (linux-hardened default)
#kernel.unprivileged_userns_clone=0
# Enable yama ptrace restrictions
# https://www.kernel.org/doc/Documentation/security/Yama.txt
# (linux-hardened default)
# set to "3" if the use of ptrace is not needed
kernel.yama.ptrace_scope=1
......@@ -3,7 +3,24 @@ credentials
Ansible role that installs all the [service
credentials](../docs/service_mesh.md#mutual-service-authentication) on
the hosts where they're needed.
the hosts where they're needed. This role works in combination with
the 'x509' action plugin.
Private keys never leave the target host, we create a CSR and sign it
on the Ansible host.
X509 credentials are stored in /etc/credentials/x509 under directories
named after the services. Every service directory contains a copy of
the public CA certificate, so it can be bind-mounted in a container
easily.
Private keys have mode 440, are owned by root and by a dedicated group
named *service*-credentials. When the service is actually installed,
later, maybe by an Ansible role, it can add the service user to this
group.
Use by including this role and setting the *credentials* variable to a
list of entries specifying the desired credentials. This is already
done once system-wide by the *float-credentials* role with the
credentials automagically derived from the service definitions by
*float*.
---
#- set_fact:
# x509_params: "{{ float_service_credentials_params[service_name_item + '-' + credentials.name] | default({}) }}"
# when: "service_name_item is defined"
- name: Set up internal PKI credentials
block:
- file:
path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}"
state: directory
loop: "{{ float_host_service_credentials_certs }}"
- name: "Check the internal PKI certificates"
x509_csr:
credentials_name: "{{ item.credentials.name }}"
domain: "{{ domain }}"
mode: "{{ item.mode }}"
params: "{{ item.x509_params|default({}) }}"
private_key_path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem"
cert_path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/cert.pem"
ca_cert_path: "/etc/credentials/x509/{{ item.credentials.name }}/ca.pem"
check: true
loop: "{{ float_host_service_credentials_certs }}"
check_mode: no
register: x509_should_update
# TODO: set the right permissions (credentials.name-credentials)
- name: "Create internal PKI CSRs"
x509_csr:
credentials_name: "{{ item.0.credentials.name }}"
domain: "{{ domain }}"
mode: "{{ item.0.mode }}"
params: "{{ item.0.x509_params|default({}) }}"
private_key_path: "/etc/credentials/x509/{{ item.0.credentials.name }}/{{ item.0.mode }}/private_key.pem"
check: false
when: "item.1.changed"
loop: "{{ float_host_service_credentials_certs | zip(x509_should_update.results) | list }}"
register: x509_csr
- name: "Sign internal PKI certificates"
x509_sign:
csr: "{{ item.1.csr }}"
mode: "{{ item.0.mode }}"
ca_cert_path: "{{ credentials_dir }}/x509/ca.pem"
ca_key_path: "{{ credentials_dir }}/x509/ca_private_key.pem"
when: "item.1.changed"
loop: "{{ float_host_service_credentials_certs | zip(x509_csr.results) | list }}"
register: x509_sign
- name: "Install the signed internal PKI certificates"
copy:
dest: "/etc/credentials/x509/{{ item.0.credentials.name }}/{{ item.0.mode }}/cert.pem"
content: "{{ item.1.cert }}"
mode: 0644
when: "item.1.changed"
loop: "{{ float_host_service_credentials_certs | zip(x509_sign.results) | list }}"
- name: "Set permissions on the private keys"
file:
path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem"
group: "{{ item.credentials.name }}-credentials"
mode: 0640
loop: "{{ float_host_service_credentials_certs }}"
rescue:
- debug:
msg: "Failed to set up one or more credentials"
---
- name: "Create service credentials group"
group:
name: "{{ item.credentials.name }}-credentials"
system: true
loop: "{{ float_host_service_credentials }}"
- name: "Create service credentials dirs"
file:
path: "/etc/credentials/x509/{{ item.credentials.name }}"
state: directory
loop: "{{ float_host_service_credentials }}"
- name: Copy CA
copy:
src: "{{ credentials_dir }}/x509/ca.pem"
dest: "/etc/credentials/x509/{{ item.credentials.name }}/ca.pem"
owner: root
group: root
mode: 0644
loop: "{{ float_host_service_credentials }}"
---
# Distribute the SSO public key to all hosts.
# This package needs to be on hosts in order to generate the CSRs.
- name: Install x509ca package
apt:
name: x509ca
state: present
# Get the credential names from the list of certs.
- set_fact:
credentials_names: "{{ credentials | map(attribute='credentials') | map(attribute='name') | unique | list }}"
- file:
path: /etc/sso
- name: "Create service credentials group"
group:
name: "{{ item }}-credentials"
system: true
loop: "{{ credentials_names }}"
- name: "Create service credentials dirs"
file:
path: "/etc/credentials/x509/{{ item }}"
state: directory
owner: root
group: root
mode: 0755
loop: "{{ credentials_names }}"
- name: Install SSO public key
- name: Copy CA
copy:
src: "{{ credentials_dir }}/sso/public.key"
dest: /etc/sso/public.key
src: "{{ credentials_dir }}/x509/ca.pem"
dest: "/etc/credentials/x509/{{ item }}/ca.pem"
owner: root
group: root
mode: 0644
loop: "{{ credentials_names }}"
# Distribute X509 credentials to all hosts as needed. This role works
# in combination with the 'x509' action plugin.
# Create and sign all certificates in a series of loops (with some
# unfortunately complex change-detection logic).
- name: Set up internal PKI credentials
block:
# X509 credentials are stored in /etc/credentials/x509 under
# directories named after the services. Every service directory
# contains a copy of the public CA certificate, so it can be
# bind-mounted in a container easily.
- file:
path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}"
state: directory
loop: "{{ credentials }}"
# Private keys have mode 440, are owned by root and by a dedicated
# group named <service>-credentials. When the service is actually
# installed, later, maybe by an Ansible role, it can add the service
# user to this group.
- name: "Check the internal PKI certificates"
x509_csr:
credentials_name: "{{ item.credentials.name }}"
domain: "{{ domain }}"
mode: "{{ item.mode }}"
params: "{{ item.x509_params|default({}) }}"
private_key_path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem"
cert_path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/cert.pem"
ca_cert_path: "/etc/credentials/x509/{{ item.credentials.name }}/ca.pem"
check: true
loop: "{{ credentials }}"
check_mode: no
register: x509_should_update
- name: Install x509ca package
apt:
name: x509ca
state: present
# TODO: set the right permissions (credentials.name-credentials)
- name: "Create internal PKI CSRs"
x509_csr:
credentials_name: "{{ item.0.credentials.name }}"
domain: "{{ domain }}"
mode: "{{ item.0.mode }}"
params: "{{ item.0.x509_params|default({}) }}"
private_key_path: "/etc/credentials/x509/{{ item.0.credentials.name }}/{{ item.0.mode }}/private_key.pem"
check: false
when: "item.1.changed"
loop: "{{ credentials | zip(x509_should_update.results) | list }}"
register: x509_csr
- import_tasks: install_credentials.yml
- import_tasks: install_certs.yml
- name: "Sign internal PKI certificates"
x509_sign:
csr: "{{ item.1.csr }}"
mode: "{{ item.0.mode }}"
ca_cert_path: "{{ credentials_dir }}/x509/ca.pem"
ca_key_path: "{{ credentials_dir }}/x509/ca_private_key.pem"
when: "item.1.changed"
loop: "{{ credentials | zip(x509_csr.results) | list }}"
register: x509_sign
# Remove credentials that shouldn't be here.
# - file: path="/etc/credentials/x509/{{ item.1.name }}" state=absent
# with_subelements:
# - "{{ services }}"
# - service_credentials
# - { skip_missing: true }
- name: "Install the signed internal PKI certificates"
copy:
dest: "/etc/credentials/x509/{{ item.0.credentials.name }}/{{ item.0.mode }}/cert.pem"
content: "{{ item.1.cert }}"
mode: 0644
when: "item.1.changed"
loop: "{{ credentials | zip(x509_sign.results) | list }}"
# Create a group for public credentials.
- name: Create public-credentials group
group:
name: public-credentials
system: yes
- name: "Set permissions on the private keys"
file:
path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem"
group: "{{ item.credentials.name }}-credentials"
mode: 0640
loop: "{{ credentials }}"
# This should use the systemd module but it doesn't take lists of services.
- name: "Restart associated services"
shell: "systemctl restart {{ services[item.0.service].systemd_services | join(' ') }}"
when: "item.1.changed and item.0.service != 'LOCAL'"
loop: "{{ credentials | zip(x509_sign.results) | list }}"
rescue:
- debug: