Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • ai3/float
  • micah/float
2 results
Select Git revision
Show changes
Commits on Source (55)
Showing
with 289 additions and 148 deletions
......@@ -89,6 +89,21 @@ the public-facing DNS zones, if unset it defaults to `ip`
`groups` (optional) is a list of groups that this host should be a
member of.
`resolver_mode` (optional) controls the desired state of the host's
*resolv.conf* file. The supported values are:
* *ignore* - do nothing and leave resolv.conf alone
* *localhost* - use localhost as a resolver, presumably some other
role will have installed a DNS cache there
* *internal:NET* - use the frontend hosts as resolvers, over the
specified overlay network named NET
* *external* - use Google Public DNS.
Note that due to ordering issues it is advised to set the *resolver_mode*
attribute on hosts only after the first setup is complete, to avoid
breaking DNS resolution while Ansible is running.
## Example
An example of a valid inventory file (for a hypotetic Vagrant
......@@ -480,6 +495,12 @@ attribute.
*master_election* attribute is also true, the backup job will only be
run on the master host for the service.
`sharded`: When this attribute is true, the dataset is considered a
sharded (partitioned) dataset, so float will **not** automatically
attempt to restore it on new servers: the idea is that for sharded
datasets, the application layer is responsible for data management.
This attribute is false by default.
`owner`: For filesystem paths, the user that will own the files upon
restore.
......
......@@ -156,6 +156,7 @@ def command_run(config, playbooks,
ansible_check=False,
ansible_diff=False,
ansible_stdout=None,
ansible_limit=None,
ansible_extra_vars=[]):
if not os.path.exists(config):
raise Exception(
......@@ -185,6 +186,8 @@ def command_run(config, playbooks,
cmd.append('--check')
if ansible_diff:
cmd.append('--diff')
if ansible_limit:
cmd.extend(['--limit', ansible_limit])
for v in ansible_extra_vars:
cmd.append('--extra-vars=' + v)
cmd.append(arg)
......@@ -398,6 +401,9 @@ themselves.
'-e', '--extra-vars', dest='ansible_extra_vars',
action='append', default=[],
help='Extra variables for Ansible')
run_parser.add_argument(
'-l', '--limit', dest='ansible_limit',
help='Limit to selected hosts')
kwargs = vars(parser.parse_args())
cmd = kwargs.pop('subparser')
......
......@@ -3,7 +3,7 @@
- hosts: all
roles:
- base
- credentials
- float-credentials
- vagrant-compat
- hosts: net-overlay
......
......@@ -358,7 +358,6 @@ def _host_vars(name, inventory, services, assignments):
'float_disabled_services': [],
'float_enabled_containers': [],
'float_host_service_credentials': [],
'float_host_service_credentials_certs': [],
'float_host_overlay_networks': _host_net_overlays(name, inventory),
'float_host_dns_map': _host_service_dns_map(
name, inventory, services, assignments),
......@@ -366,8 +365,7 @@ def _host_vars(name, inventory, services, assignments):
# Add default client credentials that are present on all hosts.
for c in DEFAULT_SERVICE_CREDENTIALS:
hv['float_host_service_credentials'].append({'credentials': c})
hv['float_host_service_credentials_certs'].append({
hv['float_host_service_credentials'].append({
'credentials': c, 'service': 'LOCAL',
'mode': 'client', 'x509_params': {}})
......@@ -390,15 +388,13 @@ def _host_vars(name, inventory, services, assignments):
for u in services[s].get('systemd_services', []):
enabled_systemd_units.add(u)
for c in services[s].get('service_credentials', []):
hv['float_host_service_credentials'].append({
'service': s, 'credentials': c})
if c.get('enable_server', True):
params = _service_credential_params(name, s, inventory, assignments)
hv['float_host_service_credentials_certs'].append({
hv['float_host_service_credentials'].append({
'credentials': c, 'service': s,
'mode': 'server', 'x509_params': params})
if c.get('enable_client', True):
hv['float_host_service_credentials_certs'].append({
hv['float_host_service_credentials'].append({
'credentials': c, 'service': s,
'mode': 'client', 'x509_params': {}})
else:
......
......@@ -21,3 +21,11 @@ emergency_ssh_key: ""
# The Debian distribution that we are using as the basis.
float_debian_dist: "buster"
# How to configure resolv.conf, one of the following options:
# 'ignore' - do nothing and leave resolv.conf alone
# 'localhost' - use localhost as the name server (presumably a cache)
# 'internal:NET' - use the frontend hosts as resolvers, over the
# specified overlay network NET
# 'external' - use Google Public DNS.
resolver_mode: "ignore"
......@@ -6,9 +6,12 @@ import datetime
import decimal
import itertools
import re
import os
import shlex
import subprocess
GUARD_FILE = '/etc/smartmon.disable'
device_info_re = re.compile(r'^(?P<k>[^:]+?)(?:(?:\sis|):)\s*(?P<v>.*)$')
ata_error_count_re = re.compile(
......@@ -386,6 +389,10 @@ def collect_disks_smart_metrics():
def main():
# Guard file to stop this automation in an emergency.
if os.path.exists(GUARD_FILE):
sys.exit(0)
version_metric = Metric('smartctl_version', {
'version': smart_ctl_version()
}, True)
......
......@@ -11,7 +11,10 @@
dataset_owner: "{{ item.1.get('owner', '') }}"
dataset_path: "{{ item.1.get('path', '') }}"
dataset_type: "{% if 'backup_command' in item.1 %}pipe{% else %}file{% endif %}"
dataset_should_backup: "{{ item.0.name in float_enabled_services and ((not item.1.get('on_master_only', False)) or (item.0.get('master_host') == inventory_hostname)) }}"
dataset_should_backup: "{{ (item.0.name in float_enabled_services) and ((not item.1.get('on_master_only', False)) or (item.0.get('master_host') == inventory_hostname)) }}"
- set_fact:
dataset_should_restore: "{{ dataset_should_backup and not item.1.get('sharded', False) }}"
- name: Set up configuration for dataset {{ dataset.name }} (source)
template:
......@@ -47,20 +50,28 @@
src: "tabacco/restore-script.j2"
dest: "/usr/lib/float/datasets/restore-{{ dataset_filename }}"
mode: 0755
when: dataset_should_backup
when: dataset_should_restore
- name: Create restore service unit
template:
src: "tabacco/restore-service.j2"
dest: "/etc/systemd/system/restore-{{ dataset_filename }}.service"
mode: 0444
when: dataset_should_backup
when: dataset_should_restore
- name: Cleanup restore service unit
file:
path: "/etc/systemd/system/restore-{{ dataset_filename }}.service"
state: absent
when: "not dataset_should_restore"
notify: reload systemd
- name: Enable restore service unit
systemd:
name: "restore-{{ dataset_filename }}.service"
enabled: yes
when: dataset_should_backup
when: dataset_should_restore
notify: reload systemd
- name: Wipe dataset restore guard file
file:
......
......@@ -35,6 +35,6 @@
- name: Configure systemd
copy:
src: "system.conf"
dest: "/etc/systemd/system.dconf"
dest: "/etc/systemd/system.conf"
notify: reload systemd
......@@ -86,3 +86,10 @@
- import_tasks: ipmi.yml
when: ipmi_device.stat.exists == true
- name: Configure resolv.conf
template:
src: "resolv.conf.j2"
dest: "/etc/resolv.conf"
when: "resolver_mode != 'ignore'"
{% if resolver_mode == 'localhost' %}
nameserver 127.0.0.1
options edns0
{% elif resolver_mode.startswith('internal:') %}
{% set dns_overlay_net = resolver_mode[9:] %}
{% for h in groups['frontend'] | sort %}
nameserver {{ hostvars[h]['ip_' + dns_overlay_net] }}
{% endfor %}
options edns0 rotate
{% else %}
nameserver 8.8.8.8
nameserver 8.8.4.4
options edns0
{% endif %}
......@@ -22,6 +22,11 @@ ruleset(name="incoming") {
# Send a copy of everything to mtail.
action(type="ompipe"
Pipe="/run/mtail.fifo"
# Protect the main queue from mtail pipe full: discard messages on
# ompipe action queue full.
queue.type="FixedArray"
queue.size="4096"
queue.timeoutEnqueue="0"
action.resumeRetryCount="-1"
action.resumeInterval="2"
action.resumeIntervalMax="30")
......
......@@ -56,12 +56,16 @@ fs.suid_dumpable=0
# Disable source routed packet acceptance.
net.ipv4.conf.all.accept_source_route=0
net.ipv4.conf.default.accept_source_route=0
net.ipv6.conf.all.accept_source_route=0
net.ipv6.conf.default.accept_source_route=0
# Do not accept ICMP redirects.
net.ipv4.conf.all.accept_redirects=0
net.ipv4.conf.default.accept_redirects=0
net.ipv4.conf.all.secure_redirects=0
net.ipv4.conf.default.secure_redirects=0
net.ipv6.conf.all.accept_redirects=0
net.ipv6.conf.default.accept_redirects=0
# Do not send ICMP redirects.
net.ipv4.conf.all.send_redirects=0
......@@ -72,3 +76,54 @@ net.ipv4.icmp_ignore_bogus_error_responses=1
# Enable RFC-recommended source route validation.
net.ipv4.conf.all.rp_filter=1
net.ipv4.conf.default.rp_filter=1
# Prevent the automatic loading of line disciplines
# https://lore.kernel.org/patchwork/patch/1034150
dev.tty.ldisc_autoload=0
# Additional protections for fifos, hardlinks, regular files, and symlinks
# https://patchwork.kernel.org/patch/10244781
# slightly tightened up from the systemd default values of "1" for each
fs.protected_fifos=2
fs.protected_hardlinks=1
fs.protected_regular=2
fs.protected_symlinks=1
# Disable the kexec system call (can be used to replace the running kernel)
# https://lwn.net/Articles/580269
# (linux-hardened default)
kernel.kexec_load_disabled=1
# Impose restrictions on exposing kernel pointers
# https://lwn.net/Articles/420403
# (linux-hardened default)
kernel.kptr_restrict=2
# Restrict use of the performance events system by unprivileged users
# https://lwn.net/Articles/696216
# (linux-hardened default)
kernel.perf_event_paranoid=3
# Disable the "magic sysrq key" functionality
# https://security.stackexchange.com/questions/138658
# https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1861238
kernel.sysrq=0
# Harden the BPF JIT compiler and restrict unprivileged use of BPF
# https://www.zerodayinitiative.com/advisories/ZDI-20-350
# https://lwn.net/Articles/660331
# (linux-hardened default)
net.core.bpf_jit_harden=2
kernel.unprivileged_bpf_disabled=1
# Disable unprivileged user namespaces
# https://lwn.net/Articles/673597
# (linux-hardened default)
#kernel.unprivileged_userns_clone=0
# Enable yama ptrace restrictions
# https://www.kernel.org/doc/Documentation/security/Yama.txt
# (linux-hardened default)
# set to "3" if the use of ptrace is not needed
kernel.yama.ptrace_scope=1
......@@ -3,7 +3,24 @@ credentials
Ansible role that installs all the [service
credentials](../docs/service_mesh.md#mutual-service-authentication) on
the hosts where they're needed.
the hosts where they're needed. This role works in combination with
the 'x509' action plugin.
Private keys never leave the target host, we create a CSR and sign it
on the Ansible host.
X509 credentials are stored in /etc/credentials/x509 under directories
named after the services. Every service directory contains a copy of
the public CA certificate, so it can be bind-mounted in a container
easily.
Private keys have mode 440, are owned by root and by a dedicated group
named *service*-credentials. When the service is actually installed,
later, maybe by an Ansible role, it can add the service user to this
group.
Use by including this role and setting the *credentials* variable to a
list of entries specifying the desired credentials. This is already
done once system-wide by the *float-credentials* role with the
credentials automagically derived from the service definitions by
*float*.
---
#- set_fact:
# x509_params: "{{ float_service_credentials_params[service_name_item + '-' + credentials.name] | default({}) }}"
# when: "service_name_item is defined"
- name: Set up internal PKI credentials
block:
- file:
path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}"
state: directory
loop: "{{ float_host_service_credentials_certs }}"
- name: "Check the internal PKI certificates"
x509_csr:
credentials_name: "{{ item.credentials.name }}"
domain: "{{ domain }}"
mode: "{{ item.mode }}"
params: "{{ item.x509_params|default({}) }}"
private_key_path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem"
cert_path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/cert.pem"
ca_cert_path: "/etc/credentials/x509/{{ item.credentials.name }}/ca.pem"
check: true
loop: "{{ float_host_service_credentials_certs }}"
check_mode: no
register: x509_should_update
# TODO: set the right permissions (credentials.name-credentials)
- name: "Create internal PKI CSRs"
x509_csr:
credentials_name: "{{ item.0.credentials.name }}"
domain: "{{ domain }}"
mode: "{{ item.0.mode }}"
params: "{{ item.0.x509_params|default({}) }}"
private_key_path: "/etc/credentials/x509/{{ item.0.credentials.name }}/{{ item.0.mode }}/private_key.pem"
check: false
when: "item.1.changed"
loop: "{{ float_host_service_credentials_certs | zip(x509_should_update.results) | list }}"
register: x509_csr
- name: "Sign internal PKI certificates"
x509_sign:
csr: "{{ item.1.csr }}"
mode: "{{ item.0.mode }}"
ca_cert_path: "{{ credentials_dir }}/x509/ca.pem"
ca_key_path: "{{ credentials_dir }}/x509/ca_private_key.pem"
when: "item.1.changed"
loop: "{{ float_host_service_credentials_certs | zip(x509_csr.results) | list }}"
register: x509_sign
- name: "Install the signed internal PKI certificates"
copy:
dest: "/etc/credentials/x509/{{ item.0.credentials.name }}/{{ item.0.mode }}/cert.pem"
content: "{{ item.1.cert }}"
mode: 0644
when: "item.1.changed"
loop: "{{ float_host_service_credentials_certs | zip(x509_sign.results) | list }}"
- name: "Set permissions on the private keys"
file:
path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem"
group: "{{ item.credentials.name }}-credentials"
mode: 0640
loop: "{{ float_host_service_credentials_certs }}"
rescue:
- debug:
msg: "Failed to set up one or more credentials"
---
- name: "Create service credentials group"
group:
name: "{{ item.credentials.name }}-credentials"
system: true
loop: "{{ float_host_service_credentials }}"
- name: "Create service credentials dirs"
file:
path: "/etc/credentials/x509/{{ item.credentials.name }}"
state: directory
loop: "{{ float_host_service_credentials }}"
- name: Copy CA
copy:
src: "{{ credentials_dir }}/x509/ca.pem"
dest: "/etc/credentials/x509/{{ item.credentials.name }}/ca.pem"
owner: root
group: root
mode: 0644
loop: "{{ float_host_service_credentials }}"
---
# Distribute the SSO public key to all hosts.
# This package needs to be on hosts in order to generate the CSRs.
- name: Install x509ca package
apt:
name: x509ca
state: present
- file:
path: /etc/sso
# Get the credential names from the list of certs.
- set_fact:
credentials_names: "{{ credentials | map(attribute='credentials') | map(attribute='name') | unique | list }}"
- name: "Create service credentials group"
group:
name: "{{ item }}-credentials"
system: true
loop: "{{ credentials_names }}"
- name: "Create service credentials dirs"
file:
path: "/etc/credentials/x509/{{ item }}"
state: directory
owner: root
group: root
mode: 0755
loop: "{{ credentials_names }}"
- name: Install SSO public key
- name: Copy CA
copy:
src: "{{ credentials_dir }}/sso/public.key"
dest: /etc/sso/public.key
src: "{{ credentials_dir }}/x509/ca.pem"
dest: "/etc/credentials/x509/{{ item }}/ca.pem"
owner: root
group: root
mode: 0644
loop: "{{ credentials_names }}"
# Distribute X509 credentials to all hosts as needed. This role works
# in combination with the 'x509' action plugin.
# Create and sign all certificates in a series of loops (with some
# unfortunately complex change-detection logic).
- name: Set up internal PKI credentials
block:
# X509 credentials are stored in /etc/credentials/x509 under
# directories named after the services. Every service directory
# contains a copy of the public CA certificate, so it can be
# bind-mounted in a container easily.
- file:
path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}"
state: directory
loop: "{{ credentials }}"
# Private keys have mode 440, are owned by root and by a dedicated
# group named <service>-credentials. When the service is actually
# installed, later, maybe by an Ansible role, it can add the service
# user to this group.
- name: "Check the internal PKI certificates"
x509_csr:
credentials_name: "{{ item.credentials.name }}"
domain: "{{ domain }}"
mode: "{{ item.mode }}"
params: "{{ item.x509_params|default({}) }}"
private_key_path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem"
cert_path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/cert.pem"
ca_cert_path: "/etc/credentials/x509/{{ item.credentials.name }}/ca.pem"
check: true
loop: "{{ credentials }}"
check_mode: no
register: x509_should_update
- name: Install x509ca package
apt:
name: x509ca
state: present
# TODO: set the right permissions (credentials.name-credentials)
- name: "Create internal PKI CSRs"
x509_csr:
credentials_name: "{{ item.0.credentials.name }}"
domain: "{{ domain }}"
mode: "{{ item.0.mode }}"
params: "{{ item.0.x509_params|default({}) }}"
private_key_path: "/etc/credentials/x509/{{ item.0.credentials.name }}/{{ item.0.mode }}/private_key.pem"
check: false
when: "item.1.changed"
loop: "{{ credentials | zip(x509_should_update.results) | list }}"
register: x509_csr
- name: "Sign internal PKI certificates"
x509_sign:
csr: "{{ item.1.csr }}"
mode: "{{ item.0.mode }}"
ca_cert_path: "{{ credentials_dir }}/x509/ca.pem"
ca_key_path: "{{ credentials_dir }}/x509/ca_private_key.pem"
when: "item.1.changed"
loop: "{{ credentials | zip(x509_csr.results) | list }}"
register: x509_sign
- import_tasks: install_credentials.yml
- import_tasks: install_certs.yml
- name: "Install the signed internal PKI certificates"
copy:
dest: "/etc/credentials/x509/{{ item.0.credentials.name }}/{{ item.0.mode }}/cert.pem"
content: "{{ item.1.cert }}"
mode: 0644
when: "item.1.changed"
loop: "{{ credentials | zip(x509_sign.results) | list }}"
# Remove credentials that shouldn't be here.
# - file: path="/etc/credentials/x509/{{ item.1.name }}" state=absent
# with_subelements:
# - "{{ services }}"
# - service_credentials
# - { skip_missing: true }
- name: "Set permissions on the private keys"
file:
path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem"
group: "{{ item.credentials.name }}-credentials"
mode: 0640
loop: "{{ credentials }}"
# Create a group for public credentials.
- name: Create public-credentials group
group:
name: public-credentials
system: yes
# This should use the systemd module but it doesn't take lists of services.
- name: "Restart associated services"
shell: "systemctl restart {{ services[item.0.service].systemd_services | join(' ') }}"
when: "item.1.changed and item.0.service != 'LOCAL'"
loop: "{{ credentials | zip(x509_sign.results) | list }}"
rescue:
- debug:
msg: "Failed to set up one or more credentials"
# Create the root directory for public credentials.
- file:
path: /etc/credentials/public
state: directory
......@@ -17,7 +17,8 @@ SyslogIdentifier={{ item.service }}-{{ item.container.name }}
{% if item.container.resources is defined %}
{% if item.container.resources.ram is defined %}
MemoryHigh={{ item.container.resources.ram }}
MemoryMax={{ item.container.resources.ram }}
ExecStartPost=+/bin/sh -c "echo 0 > /sys/fs/cgroup/memory/system.slice/%n/memory.swappiness"
{% endif %}
{% if item.container.resources.cpu is defined %}
CPUQuota={{ 100 * item.container.resources.cpu }}%
......
This role includes the *credentials* role with the list of credentials
derived from the service descriptions in services.yml.
---
dependencies:
- role: credentials
vars:
credentials: "{{ float_host_service_credentials }}"
---
# Distribute the SSO public key to all hosts.
- file:
path: /etc/sso
state: directory
owner: root
group: root
mode: 0755
- name: Install SSO public key
copy:
src: "{{ credentials_dir }}/sso/public.key"
dest: /etc/sso/public.key
mode: 0644
# Create a group for public credentials.
- name: Create public-credentials group
group:
name: public-credentials
system: yes
# Create the root directory for public credentials.
- file:
path: /etc/credentials/public
state: directory