Commit 0c63b5fe authored by ale's avatar ale

Forward signals to child process

Required also a small refactoring of the timeout logic. Begin to add
the --node-exporter-dir option.
parent d4a5b4fe
Pipeline #1310 failed with stages
in 1 minute and 33 seconds
#! /bin/sh
if [ -x "`which autoreconf 2>/dev/null`" ] ; then
exec autoreconf -ivf
fi
aclocal -I m4 && \
autoheader && \
automake --add-missing --foreign && \
autoconf
AC_INIT([runcron], [1.0], [ale@incal.net])
AC_PREREQ([2.69])
AC_INIT([runcron], [1.1], [ale@incal.net])
AC_CONFIG_SRCDIR([runcron.c])
AC_LANG(C)
......
......@@ -19,23 +19,24 @@
*/
#define _GNU_SOURCE
#include <errno.h>
#include <getopt.h>
#include <math.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <getopt.h>
#include <errno.h>
#include <unistd.h>
#include <signal.h>
#include <time.h>
#include <math.h>
#include <sys/file.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include "config.h"
/* Global options */
char *job_name = NULL;
char *node_exporter_dir = "/var/lib/prometheus/node-exporter";
int splay_time = 0;
int run_timeout = 0;
int syslog = 0;
......@@ -46,45 +47,60 @@ int lock_wait = 0;
char *lock_dir = "/var/tmp";
struct timespec one_second = {.tv_sec = 1, .tv_nsec = 0};
int usage() {
fprintf(stderr,
"%s v%s - a cron job wrapper\n"
"Usage: %s [<OPTIONS>] [--] <COMMAND> [<ARGS>...]\n"
"\n"
"Known options:\n"
"\n"
" --help Show this help message\n"
" --splay SECONDS Wait a random amount of time between 0 and SECONDS before\n"
" actually starting the command.\n"
" --timeout SECONDS\n"
" If the command runs for more than the specified amount of\n"
" time, terminate it. By default commands can run forever.\n"
"\n"
"Options controlling the command output:\n"
"\n"
" --quiet Only show output from the command if it fails, suppress\n"
" all output if the exit status is 0.\n"
" --syslog Log command output to syslog, instead of stderr.\n"
"\n"
"Options controlling locking behavior:\n"
"\n"
" -n, --name NAME Set the job name. By default, the basename of COMMAND will\n"
" be used.\n"
" --no-lock Disable locking, allow running multiple instances at once.\n"
" --wait Wait for the lock to be released. If another instance of\n"
" this command is running, wait until it terminates (or\n"
" until the lock timeout expires).\n"
" --lock-timeout SECONDS\n"
" Wait at most the specified amount of time for the instance\n"
" lock to be released, failing with an error if after SECONDS\n"
" the lock could not be obtained. If set to 0 (the default),\n"
" wait forever. Specifying this option implies --wait.\n"
"\n"
"If you are passing options to COMMAND, remember to use `--' to tell the runcron\n"
"option parser to stop interpreting options for itself.\n"
"\n",
PACKAGE, PACKAGE_VERSION, PACKAGE
);
fprintf(
stderr,
"%s v%s - a cron job wrapper\n"
"Usage: %s [<OPTIONS>] [--] <COMMAND> [<ARGS>...]\n"
"\n"
"Known options:\n"
"\n"
" --help Show this help message\n"
" --splay SECONDS Wait a random amount of time between 0 and SECONDS "
"before\n"
" actually starting the command.\n"
" --timeout SECONDS\n"
" If the command runs for more than the specified "
"amount of\n"
" time, terminate it. By default commands can run "
"forever.\n"
"\n"
"Options controlling the command output:\n"
"\n"
" --quiet Only show output from the command if it fails, "
"suppress\n"
" all output if the exit status is 0.\n"
" --syslog Log command output to syslog, instead of stderr.\n"
"\n"
"Options controlling locking behavior:\n"
"\n"
" -n, --name NAME Set the job name. By default, the basename of "
"COMMAND will\n"
" be used.\n"
" --no-lock Disable locking, allow running multiple instances "
"at once.\n"
" --wait Wait for the lock to be released. If another "
"instance of\n"
" this command is running, wait until it terminates "
"(or\n"
" until the lock timeout expires).\n"
" --lock-timeout SECONDS\n"
" Wait at most the specified amount of time for the "
"instance\n"
" lock to be released, failing with an error if "
"after SECONDS\n"
" the lock could not be obtained. If set to 0 (the "
"default),\n"
" wait forever. Specifying this option implies "
"--wait.\n"
"\n"
"If you are passing options to COMMAND, remember to use `--' to tell the "
"runcron\n"
"option parser to stop interpreting options for itself.\n"
"\n",
PACKAGE, PACKAGE_VERSION, PACKAGE);
return 2;
}
......@@ -156,7 +172,7 @@ void lock_with_timeout() {
char *lockfile = NULL;
sighandler_t old_handler = NULL;
int fd, r, lock_flags;
if (asprintf(&lockfile, "%s/runcron.lock.%s", lock_dir, job_name) < 0) {
fprintf(stderr, "Error: could not allocate memory\n");
exit(1);
......@@ -182,7 +198,8 @@ void lock_with_timeout() {
if (errno == EWOULDBLOCK) {
fprintf(stderr, "Error: another instance is already running\n");
} else {
fprintf(stderr, "Error: lock could not be obtained: %s\n", strerror(errno));
fprintf(stderr, "Error: lock could not be obtained: %s\n",
strerror(errno));
}
exit(3);
}
......@@ -214,7 +231,7 @@ void setup_syslog() {
} else if (pid == 0) {
int i;
char *logger_args[] = {
"logger", "-p", "daemon.info", "-t", tag, NULL,
"logger", "-p", "daemon.info", "-t", tag, NULL,
};
/* Put the pipe on stdin, close all other fds. */
dup2(pipefd[0], 0);
......@@ -253,26 +270,109 @@ void dumpfd(int from, int to) {
int child_pid = -1;
int termination_signal = SIGTERM;
volatile int timeout_expired = 0;
sigset_t parent_sigset, child_sigset;
int wait_and_forward_signals(int *status) {
siginfo_t sig;
while (1) {
// Wait for a signal.
if (sigtimedwait(&parent_sigset, &sig, &one_second) < 0) {
switch (errno) {
case EAGAIN:
break;
case EINTR:
break;
default:
fprintf(stderr, "Error: unexpected error in sigtimedwait(): %s\n",
strerror(errno));
return -1;
}
} else {
switch (sig.si_signo) {
case SIGCHLD:
// Fall through to reaping the child process.
break;
case SIGALRM:
// Timeout expired, kill the child process.
if (kill(child_pid, termination_signal) < 0) {
if (errno == ESRCH) {
// Process is already gone, reap it.
break;
}
fprintf(stderr, "Error: sending signal %d to pid %d: %s",
sig.si_signo, child_pid, strerror(errno));
return -1;
}
// Retry in 3 seconds with SIGKILL.
timeout_expired = 1;
termination_signal = SIGKILL;
alarm(3);
default:
// Send signal to child pid.
if (kill(child_pid, sig.si_signo) < 0) {
if (errno == ESRCH) {
// Process is already gone, reap it.
break;
}
fprintf(stderr, "Error: sending signal %d to pid %d: %s",
sig.si_signo, child_pid, strerror(errno));
return -1;
}
continue;
}
}
// Call waitpid().
if (waitpid(child_pid, status, WNOHANG) > 0) {
return 0;
}
}
}
int setup_signals() {
// Signals that should bypass the main loop.
int i, signals_to_skip[] = {
SIGFPE, SIGILL, SIGSEGV, SIGBUS, SIGABRT,
SIGTRAP, SIGSYS, SIGTTIN, SIGTTOU,
};
void run_timeout_expired() {
if (child_pid > 0) {
timeout_expired = 1;
kill(child_pid, termination_signal);
// Set up the signal mask, before forking.
if (sigfillset(&parent_sigset)) {
fprintf(stderr, "Error: sigfillset() failed: %s\n", strerror(errno));
return -1;
}
for (i = 0; i < sizeof(signals_to_skip) / sizeof(int); i++) {
if (sigdelset(&parent_sigset, signals_to_skip[i])) {
fprintf(stderr, "Error: sigdelset() failed: %s\n", strerror(errno));
return -1;
}
}
/* Reset the timer 3 seconds ahead, send SIGKILL. */
termination_signal = SIGKILL;
alarm(3);
if (sigprocmask(SIG_SETMASK, &parent_sigset, &child_sigset)) {
fprintf(stderr, "Error: sigprocmask() failed: %s\n", strerror(errno));
return -1;
}
return 0;
}
int run(char **argv) {
char temp_name[] = "/tmp/runcron.out.XXXXXX";
int r, wait_status, exit_status = -1, logfd = -1;
if (setup_signals() < 0) {
return -1;
}
if (log_on_failure) {
logfd = mkstemp(temp_name);
if (logfd < 0) {
fprintf(stderr, "Error: could not create temporary output file: %s\n", strerror(errno));
fprintf(stderr, "Error: could not create temporary output file: %s\n",
strerror(errno));
} else {
unlink(temp_name);
}
......@@ -287,6 +387,11 @@ int run(char **argv) {
dup2(logfd, 2);
}
if (sigprocmask(SIG_SETMASK, &child_sigset, NULL)) {
fprintf(stderr, "Error: sigprocmask(): %s\n", strerror(errno));
exit(1);
}
r = execvp(argv[0], argv);
if (r < 0) {
fprintf(stderr, "Error: exec(%s): %s\n", argv[0], strerror(errno));
......@@ -294,27 +399,23 @@ int run(char **argv) {
}
} else {
if (run_timeout > 0) {
signal(SIGALRM, run_timeout_expired);
alarm(run_timeout);
}
r = waitpid(child_pid, &wait_status, 0);
r = wait_and_forward_signals(&wait_status);
if (r < 0) {
fprintf(stderr, "Error: wait(): %s\n", strerror(errno));
return 1;
} else {
if (WIFEXITED(wait_status)) {
exit_status = WEXITSTATUS(wait_status);
/**
if (exit_status != 0) {
fprintf(stderr, "Error: %s exited with status %d\n", argv[0], exit_status);
}
**/
} else if (WIFSIGNALED(wait_status)) {
if (timeout_expired) {
fprintf(stderr, "Error: %s execution timed out\n", argv[0]);
exit_status = 142;
} else {
fprintf(stderr, "Error: %s terminated by signal %d\n", argv[0], WTERMSIG(wait_status));
fprintf(stderr, "Error: %s terminated by signal %d\n", argv[0],
WTERMSIG(wait_status));
exit_status = 128 + WTERMSIG(wait_status);
}
} else {
fprintf(stderr, "Error: %s terminated for unknown reason\n", argv[0]);
......@@ -339,21 +440,20 @@ int main(int argc, char **argv) {
int option_index = 0;
const char *opt_name = NULL;
static struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"name", required_argument, 0, 'n'},
{"splay", required_argument, 0, 0},
{"timeout", required_argument, 0, 0},
{"quiet", no_argument, &log_on_failure, 1},
{"syslog", no_argument, &syslog, 1},
{"no-lock", no_argument, &locking, 0},
{"nolock", no_argument, &locking, 0},
{"lock-timeout", required_argument, 0, 0},
{"wait", no_argument, &lock_wait, 1},
{0, 0, 0, 0}
};
c = getopt_long(argc, argv, "hqn:",
long_options, &option_index);
{"help", no_argument, 0, 'h'},
{"name", required_argument, 0, 'n'},
{"splay", required_argument, 0, 0},
{"timeout", required_argument, 0, 0},
{"quiet", no_argument, &log_on_failure, 1},
{"syslog", no_argument, &syslog, 1},
{"no-lock", no_argument, &locking, 0},
{"nolock", no_argument, &locking, 0},
{"lock-timeout", required_argument, 0, 0},
{"wait", no_argument, &lock_wait, 1},
{"node-exporter-dir", required_argument, 0, 0},
{0, 0, 0, 0}};
c = getopt_long(argc, argv, "hqn:", long_options, &option_index);
if (c == -1) {
break;
}
......@@ -376,9 +476,12 @@ int main(int argc, char **argv) {
} else if (!strcmp(opt_name, "lock-timeout")) {
lock_timeout = to_int(optarg);
if (lock_timeout < 0) {
fprintf(stderr, "Error: --lock-timeout argument must be an integer\n");
fprintf(stderr,
"Error: --lock-timeout argument must be an integer\n");
exit(2);
}
} else if (!strcmp(opt_name, "node-exporter-dir")) {
node_exporter_dir = optarg;
}
break;
......@@ -402,11 +505,13 @@ int main(int argc, char **argv) {
/* Check for option consistency. */
if (!locking) {
if (lock_wait) {
fprintf(stderr, "Error: --wait and --no-lock should not be specified together\n");
fprintf(stderr,
"Error: --wait and --no-lock should not be specified together\n");
exit(2);
}
if (lock_timeout > 0) {
fprintf(stderr, "Error: --no-lock and --lock-timeout should not be specified together\n");
fprintf(stderr, "Error: --no-lock and --lock-timeout should not be "
"specified together\n");
exit(2);
}
} else {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment