Commit 879f885b authored by ale's avatar ale
Browse files

Initial commit

parents
Makefile
Makefile.in
autom4te.cache
aclocal.m4
depcomp
config.guess
config.sub
config.log
config.status
config.h
install-sh
libtool
ltmain.sh
compile
missing
test-driver
configure
gtest
stamp-*
*.o
*.lo
*.la
*~
*.log
*.trs
.libs
.deps
ACLOCAL_AMFLAGS = -I m4
moduledir = $(MODULEDIR)
module_PROGRAMS = suexec-sandbox
suexec_sandbox_SOURCES = \
suexec.c suexec.h \
sandbox.c sandbox.h \
log.h strlist.h config.h
#! /bin/sh
if [ -x "`which autoreconf 2>/dev/null`" ] ; then
exec autoreconf -ivf
fi
aclocal -I m4 && \
autoheader && \
automake --add-missing --foreign && \
autoconf
/* config.h.in. Generated from configure.ac by autoheader. */
/* Define to 1 if you have the `cap' library (-lcap). */
#undef HAVE_LIBCAP
/* Define to 1 if you have the `seccomp' library (-lseccomp). */
#undef HAVE_LIBSECCOMP
/* Name of package */
#undef PACKAGE
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Version number of package */
#undef VERSION
AC_PREREQ([2.69])
AC_INIT([suexec-sandbox], [0.1], [info@autistici.org])
AM_INIT_AUTOMAKE([foreign -Wall -Werror])
AC_CONFIG_SRCDIR([suexec.c])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_HEADERS(config.h)
AC_LANG(C)
CFLAGS="$CFLAGS -Wall -Wextra -Werror -Wpedantic -std=c11"
AC_PROG_CC
AC_CHECK_LIB(cap, cap_get_proc, [], [AC_ERROR(libcap is missing)])
AC_CHECK_LIB(seccomp, seccomp_init, [], [AC_ERROR(libseccomp is mising)])
AC_SUBST(MODULEDIR, "/usr/lib/apache2")
AC_ARG_WITH(apache2-lib-dir,
AC_HELP_STRING([--with-apache2-lib-dir=DIR],
[Where to install the suexec binary [[/usr/lib/apache2]]]),
[case "${withval}" in
/*) MODULEDIR="${withval}";;
*) AC_MSG_ERROR(Bad value for --with-apache2-lib-dir);;
esac])
AC_MSG_NOTICE([Suexec installation path $MODULEDIR])
AC_OUTPUT(Makefile)
#ifndef __suexec_log_H
#define __suexec_log_H 1
#include <errno.h>
#include <stdio.h>
#include <string.h>
#define LOG_PREFIX "suexec: "
#define log_println(fmt) fprintf(stderr, LOG_PREFIX fmt "\n")
#define log_println_errno(fmt) fprintf(stderr, LOG_PREFIX fmt ": %s\n", strerror(errno))
#define log_printf(fmt, ...) fprintf(stderr, LOG_PREFIX fmt "\n", ##__VA_ARGS__)
#define log_printf_errno(fmt, ...) fprintf(stderr, LOG_PREFIX fmt "%s\n", ##__VA_ARGS__, strerror(errno))
#endif
#define _GNU_SOURCE
#include "config.h"
#include "log.h"
#include "sandbox.h"
#include <errno.h>
#include <fcntl.h>
#include <grp.h>
#include <linux/capability.h>
#include <linux/limits.h>
#include <pwd.h>
#include <sched.h>
#include <seccomp.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/capability.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
static void choose_hostname(uid_t uid, char *buf, size_t len) {
pid_t pid = getpid();
struct timespec now = {0};
clock_gettime(CLOCK_MONOTONIC, &now);
snprintf(buf, len, "suexec-%d-%d-%05lx", uid, pid, now.tv_sec);
}
int sandbox_config_init(struct sandbox_config *config, uid_t uid, gid_t gid,
int argc, char *argv0, char **argv) {
config->uid = uid;
config->gid = gid;
config->argc = argc;
config->argv0 = argv0;
config->argv = argv;
config->fd = -1;
config->new_root_dir = NULL;
choose_hostname(uid, config->hostname, HOSTNAME_SIZE);
return 0;
}
static int capabilities() {
fprintf(stderr, "=> dropping capabilities...");
int drop_caps[] = {
CAP_AUDIT_CONTROL, CAP_AUDIT_READ, CAP_AUDIT_WRITE, CAP_BLOCK_SUSPEND,
CAP_DAC_READ_SEARCH, CAP_FSETID, CAP_IPC_LOCK, CAP_MAC_ADMIN,
CAP_MAC_OVERRIDE, CAP_MKNOD, CAP_SETFCAP, CAP_SYSLOG,
CAP_SYS_ADMIN, CAP_SYS_BOOT, CAP_SYS_MODULE, CAP_SYS_NICE,
CAP_SYS_RAWIO, CAP_SYS_RESOURCE, CAP_SYS_TIME, CAP_WAKE_ALARM};
size_t num_caps = sizeof(drop_caps) / sizeof(*drop_caps);
fprintf(stderr, "bounding...");
for (size_t i = 0; i < num_caps; i++) {
if (prctl(PR_CAPBSET_DROP, drop_caps[i], 0, 0, 0)) {
log_println_errno("prctl failed");
return 1;
}
}
fprintf(stderr, "inheritable...");
cap_t caps = NULL;
if (!(caps = cap_get_proc()) ||
cap_set_flag(caps, CAP_INHERITABLE, num_caps, drop_caps, CAP_CLEAR) ||
cap_set_proc(caps)) {
log_println_errno("cap_set_proc failed");
if (caps)
cap_free(caps);
return 1;
}
cap_free(caps);
fprintf(stderr, "done.\n");
return 0;
}
static int pivot_root(const char *new_root, const char *put_old) {
return syscall(SYS_pivot_root, new_root, put_old);
}
static int mounts(struct sandbox_config *config) {
if (!config->new_root_dir)
return 0;
fprintf(stderr, "=> remounting everything with MS_PRIVATE...");
if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
log_println_errno("mount failed");
return -1;
}
fprintf(stderr, "remounted.\n");
fprintf(stderr, "=> making a temp directory and a bind mount there...");
char mount_dir[] = "/tmp/tmp.XXXXXX";
if (!mkdtemp(mount_dir)) {
log_println_errno("failed making outer temp directory");
return -1;
}
if (mount(config->new_root_dir, mount_dir, NULL, MS_BIND | MS_PRIVATE, NULL)) {
fprintf(stderr, "bind mount failed!\n");
return -1;
}
char inner_mount_dir[] = "/tmp/tmp.XXXXXX/oldroot.XXXXXX";
memcpy(inner_mount_dir, mount_dir, sizeof(mount_dir) - 1);
if (!mkdtemp(inner_mount_dir)) {
log_println_errno("failed making inner temp directory");
return -1;
}
fprintf(stderr, "done.\n");
fprintf(stderr, "=> pivoting root...");
if (pivot_root(mount_dir, inner_mount_dir)) {
log_println_errno("pivot_root failed");
return -1;
}
fprintf(stderr, "done.\n");
char *old_root_dir = basename(inner_mount_dir);
char old_root[sizeof(inner_mount_dir) + 1] = {"/"};
strcpy(&old_root[1], old_root_dir);
fprintf(stderr, "=> unmounting %s...", old_root);
if (chdir("/")) {
log_println_errno("chdir / failed");
return -1;
}
if (umount2(old_root, MNT_DETACH)) {
log_println_errno("umount failed");
return -1;
}
if (rmdir(old_root)) {
log_println_errno("rmdir failed");
return -1;
}
fprintf(stderr, "done.\n");
return 0;
}
#define SCMP_FAIL SCMP_ACT_ERRNO(EPERM)
static int syscalls() {
scmp_filter_ctx ctx = NULL;
fprintf(stderr, "=> filtering syscalls...");
if (!(ctx = seccomp_init(SCMP_ACT_ALLOW)) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(chmod), 1,
SCMP_A1(SCMP_CMP_MASKED_EQ, S_ISUID, S_ISUID)) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(chmod), 1,
SCMP_A1(SCMP_CMP_MASKED_EQ, S_ISGID, S_ISGID)) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(fchmod), 1,
SCMP_A1(SCMP_CMP_MASKED_EQ, S_ISUID, S_ISUID)) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(fchmod), 1,
SCMP_A1(SCMP_CMP_MASKED_EQ, S_ISGID, S_ISGID)) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(fchmodat), 1,
SCMP_A2(SCMP_CMP_MASKED_EQ, S_ISUID, S_ISUID)) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(fchmodat), 1,
SCMP_A2(SCMP_CMP_MASKED_EQ, S_ISGID, S_ISGID)) ||
seccomp_rule_add(
ctx, SCMP_FAIL, SCMP_SYS(unshare), 1,
SCMP_A0(SCMP_CMP_MASKED_EQ, CLONE_NEWUSER, CLONE_NEWUSER)) ||
seccomp_rule_add(
ctx, SCMP_FAIL, SCMP_SYS(clone), 1,
SCMP_A0(SCMP_CMP_MASKED_EQ, CLONE_NEWUSER, CLONE_NEWUSER)) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(ioctl), 1,
SCMP_A1(SCMP_CMP_MASKED_EQ, TIOCSTI, TIOCSTI)) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(keyctl), 0) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(add_key), 0) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(request_key), 0) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(ptrace), 0) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(mbind), 0) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(migrate_pages), 0) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(move_pages), 0) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(set_mempolicy), 0) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(userfaultfd), 0) ||
seccomp_rule_add(ctx, SCMP_FAIL, SCMP_SYS(perf_event_open), 0) ||
seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0) || seccomp_load(ctx)) {
if (ctx)
seccomp_release(ctx);
log_println_errno("seccomp failed");
return 1;
}
seccomp_release(ctx);
fprintf(stderr, "done.\n");
return 0;
}
#define MEMORY "1073741824"
#define SHARES "256"
#define PIDS "64"
#define WEIGHT "10"
#define FD_COUNT 64
struct cgrp_control {
char control[256];
struct cgrp_setting {
char name[256];
char value[256];
} * *settings;
};
struct cgrp_setting add_to_tasks = {.name = "tasks", .value = "0"};
struct cgrp_control *cgrps[] = {
&(struct cgrp_control){
.control = "memory",
.settings =
(struct cgrp_setting *[]){
&(struct cgrp_setting){.name = "memory.limit_in_bytes",
.value = MEMORY},
&(struct cgrp_setting){.name = "memory.kmem.limit_in_bytes",
.value = MEMORY},
&add_to_tasks, NULL}},
&(struct cgrp_control){
.control = "cpu",
.settings =
(struct cgrp_setting *[]){
&(struct cgrp_setting){.name = "cpu.shares", .value = SHARES},
&add_to_tasks, NULL}},
&(struct cgrp_control){
.control = "pids",
.settings =
(struct cgrp_setting *[]){
&(struct cgrp_setting){.name = "pids.max", .value = PIDS},
&add_to_tasks, NULL}},
&(struct cgrp_control){
.control = "blkio",
.settings =
(struct cgrp_setting *[]){
&(struct cgrp_setting){.name = "blkio.weight", .value = PIDS},
&add_to_tasks, NULL}},
NULL};
static int resources(struct sandbox_config *config) {
fprintf(stderr, "=> setting cgroups...");
for (struct cgrp_control **cgrp = cgrps; *cgrp; cgrp++) {
char dir[PATH_MAX] = {0};
fprintf(stderr, "%s...", (*cgrp)->control);
if (snprintf(dir, sizeof(dir), "/sys/fs/cgroup/%s/%s", (*cgrp)->control,
config->hostname) == -1) {
return -1;
}
if (mkdir(dir, S_IRUSR | S_IWUSR | S_IXUSR)) {
log_printf_errno("mkdir %s failed", dir);
return -1;
}
for (struct cgrp_setting **setting = (*cgrp)->settings; *setting;
setting++) {
char path[PATH_MAX] = {0};
int fd = 0;
if (snprintf(path, sizeof(path), "%s/%s", dir, (*setting)->name) == -1) {
log_println_errno("snprintf failed");
return -1;
}
if ((fd = open(path, O_WRONLY)) == -1) {
log_printf_errno("opening %s failed", path);
return -1;
}
if (write(fd, (*setting)->value, strlen((*setting)->value)) == -1) {
log_printf_errno("writing to %s failed", path);
close(fd);
return -1;
}
close(fd);
}
}
fprintf(stderr, "done.\n");
fprintf(stderr, "=> setting rlimit...");
if (setrlimit(RLIMIT_NOFILE, &(struct rlimit){
.rlim_max = FD_COUNT, .rlim_cur = FD_COUNT,
})) {
log_println_errno("setrlimit failed");
return 1;
}
fprintf(stderr, "done.\n");
return 0;
}
static int free_resources(struct sandbox_config *config) {
fprintf(stderr, "=> cleaning cgroups...");
for (struct cgrp_control **cgrp = cgrps; *cgrp; cgrp++) {
char dir[PATH_MAX] = {0};
char task[PATH_MAX] = {0};
int task_fd = 0;
if (snprintf(dir, sizeof(dir), "/sys/fs/cgroup/%s/%s", (*cgrp)->control,
config->hostname) == -1 ||
snprintf(task, sizeof(task), "/sys/fs/cgroup/%s/tasks",
(*cgrp)->control) == -1) {
log_println_errno("snprintf failed");
return -1;
}
if ((task_fd = open(task, O_WRONLY)) == -1) {
log_printf_errno("opening %s failed", task);
return -1;
}
if (write(task_fd, "0", 2) == -1) {
log_printf_errno("writing to %s failed", task);
close(task_fd);
return -1;
}
close(task_fd);
if (rmdir(dir)) {
log_printf_errno("rmdir %s failed", dir);
return -1;
}
}
fprintf(stderr, "done.\n");
return 0;
}
#define USERNS_OFFSET 10000
#define USERNS_COUNT 2000
static int handle_child_uid_map(pid_t child_pid, int fd) {
int uid_map = 0;
int has_userns = -1;
if (read(fd, &has_userns, sizeof(has_userns)) != sizeof(has_userns)) {
fprintf(stderr, "couldn't read from child!\n");
return -1;
}
if (has_userns) {
char path[PATH_MAX] = {0};
for (char **file = (char *[]){"uid_map", "gid_map", 0}; *file; file++) {
if (snprintf(path, sizeof(path), "/proc/%d/%s", child_pid, *file) > PATH_MAX) {
log_println_errno("snprintf failed");
return -1;
}
fprintf(stderr, "writing %s...", path);
if ((uid_map = open(path, O_WRONLY)) == -1) {
log_printf_errno("open %s failed", path);
return -1;
}
if (dprintf(uid_map, "0 %d %d\n", USERNS_OFFSET, USERNS_COUNT) == -1) {
log_println_errno("dprintf failed");
close(uid_map);
return -1;
}
close(uid_map);
}
}
if (write(fd, &(int){0}, sizeof(int)) != sizeof(int)) {
log_println_errno("couldn't write");
return -1;
}
return 0;
}
static int userns(struct sandbox_config *config) {
fprintf(stderr, "=> trying a user namespace...");
int has_userns = !unshare(CLONE_NEWUSER);
if (write(config->fd, &has_userns, sizeof(has_userns)) !=
sizeof(has_userns)) {
log_println_errno("couldn't write");
return -1;
}
int result = 0;
if (read(config->fd, &result, sizeof(result)) != sizeof(result)) {
log_println_errno("couldn't read");
return -1;
}
if (result)
return -1;
if (has_userns) {
fprintf(stderr, "done.\n");
} else {
fprintf(stderr, "unsupported? continuing.\n");
}
fprintf(stderr, "=> switching to uid %d / gid %d...", config->uid,
config->uid);
if (setgroups(1, &(gid_t){config->uid}) ||
setresgid(config->uid, config->uid, config->uid) ||
setresuid(config->uid, config->uid, config->uid)) {
log_println_errno("failed setting uid/gid");
return -1;
}
fprintf(stderr, "done.\n");
return 0;
}
static int child(void *arg) {
struct sandbox_config *config = arg;
if (sethostname(config->hostname, strlen(config->hostname)) ||
mounts(config) || userns(config) || capabilities() || syscalls()) {
close(config->fd);
return -1;
}
if (close(config->fd)) {
log_println_errno("close failed");
return -1;
}
if (execve(config->argv0, config->argv, NULL)) {
log_printf_errno("execve(%s) failed!", config->argv0);
return -1;
}
return 0;
}
int sandbox_start(struct sandbox_config *config) {
int err = 0;
int sockets[2] = {0};
pid_t child_pid = 0;
fprintf(stderr, "=> validating Linux version...");
struct utsname host = {0};
if (uname(&host)) {
log_println_errno("uname failed");
goto cleanup;
}
int major = -1;
int minor = -1;
if (sscanf(host.release, "%d.%d.", &major, &minor) != 2) {
log_printf("weird release format: %s", host.release);
goto cleanup;
}
if (major != 4 || minor < 7) {
log_printf("expected kernel > 4.7.x: %s", host.release);
goto cleanup;
}
if (strcmp("x86_64", host.machine)) {
log_printf("expected x86_64: %s", host.machine);
goto cleanup;
}
fprintf(stderr, "%s on %s.\n", host.release, host.machine);
if (socketpair(AF_LOCAL, SOCK_SEQPACKET, 0, sockets)) {
log_println_errno("socketpair failed");
goto error;
}
if (fcntl(sockets[0], F_SETFD, FD_CLOEXEC)) {
log_println_errno("fcntl failed");
goto error;
}
config->fd = sockets[1];
#define STACK_SIZE (1024 * 1024)
char *stack = 0;
if (!(stack = malloc(STACK_SIZE))) {
log_println("out of memory");
goto error;
}
if (resources(config)) {
err = -1;
goto clear_resources;
}
int flags = CLONE_NEWNS | CLONE_NEWCGROUP | CLONE_NEWPID | CLONE_NEWIPC |
CLONE_NEWNET | CLONE_NEWUTS;
if ((child_pid =
clone(child, stack + STACK_SIZE, flags | SIGCHLD, config)) == -1) {
log_println_errno("clone failed");
err = -1;
goto clear_resources;
}
close(sockets[1]);
sockets[1] = 0;
close(sockets[1]);
sockets[1] = 0;
if (handle_child_uid_map(child_pid, sockets[0])) {
err = -1;
goto kill_and_finish_child;
}
goto finish_child;
kill_and_finish_child:
if (child_pid)
kill(child_pid, SIGKILL);
finish_child:;
int child_status = 0;
waitpid(child_pid, &child_status, 0);
err |= WEXITSTATUS(child_status);
clear_resources:
free_resources(config);
free(stack);
goto cleanup;
error:
err = -1;
cleanup:
if (sockets[0])
close(sockets[0]);
if (sockets[1])
close(sockets[1]);
return err;
}
#ifndef __suexec_sandbox_H
#define __suexec_sandbox_H 1
#include <sys/types.h>
#define HOSTNAME_SIZE 64
struct sandbox_config {
char hostname[HOSTNAME_SIZE];
int fd;