Commit ed6da2b3 authored by ale's avatar ale

Move jobs library to a separate package

The Job and Scheduler objects are fairly generic.
parent cf0fc44a
......@@ -7,6 +7,8 @@ import (
"regexp"
"strings"
"time"
"git.autistici.org/ale/tabacco/jobs"
)
// Backup is the over-arching entity describing a high level backup
......@@ -122,9 +124,9 @@ type Repository interface {
// Manager for backups and restores.
type Manager interface {
BackupJob(context.Context, []SourceSpec) (Backup, Job, error)
BackupJob(context.Context, []SourceSpec) (Backup, jobs.Job, error)
Backup(context.Context, []SourceSpec) (Backup, error)
RestoreJob(context.Context, FindRequest, string) (Job, error)
RestoreJob(context.Context, FindRequest, string) (jobs.Job, error)
Restore(context.Context, FindRequest, string) error
Close() error
}
......@@ -5,6 +5,8 @@ import (
"fmt"
"testing"
"time"
"git.autistici.org/ale/tabacco/jobs"
)
type dummyMetadataEntry struct {
......@@ -146,12 +148,12 @@ func TestBackup(t *testing.T) {
},
},
{
Name: "source2",
Handler: "dbpipe",
AtomsScript: "echo user1 user1 ; echo user2 user2",
Name: "source2",
Handler: "dbpipe",
AtomsCommand: "echo user1 user1 ; echo user2 user2",
},
}
queueSpec := MultiQueueSpec{
queueSpec := jobs.QueueSpec{
Workers: map[string]int{"backup": 2},
}
......
......@@ -71,12 +71,12 @@ func (c *daemonCommand) Execute(ctx context.Context, f *flag.FlagSet, args ...in
return subcommands.ExitFailure
}
mgr, err := tabacco.NewManager(ctx, configMgr, store)
d, err := tabacco.NewDaemon(ctx, configMgr, store)
if err != nil {
log.Printf("error: %v", err)
return subcommands.ExitFailure
}
defer mgr.Close() // nolint
defer d.Close() // nolint
// Wait for the outmost Context to terminate (presumably due to SIGTERM).
log.Printf("backup manager started")
......
package tabacco
import (
"crypto/rand"
"encoding/binary"
"errors"
"fmt"
"io/ioutil"
......@@ -9,15 +11,19 @@ import (
"sync"
"git.autistici.org/ai3/go-common/clientutil"
"git.autistici.org/ale/tabacco/jobs"
"git.autistici.org/ale/tabacco/util"
"gopkg.in/yaml.v2"
)
var defaultSeedFile = "/var/tmp/.tabacco_scheduler_seed"
// Config is the global configuration object. While the actual
// configuration is spread over multiple files and directories, this
// holds it all together.
type Config struct {
Hostname string `yaml:"hostname"`
Queue MultiQueueSpec `yaml:"queue_config"`
Queue jobs.QueueSpec `yaml:"queue_config"`
Repository RepositorySpec `yaml:"repository"`
DryRun bool `yaml:"dry_run"`
......@@ -112,7 +118,7 @@ func ReadConfig(path string) (*Config, error) {
}
func logMultiError(prefix string, err error) {
if merr, ok := err.(*multiError); ok {
if merr, ok := err.(*util.MultiError); ok {
for _, e := range merr.Errors() {
log.Printf("%s%v", prefix, e)
}
......@@ -139,13 +145,13 @@ func foreachYAMLFile(dir string, f func(string) error) error {
if err != nil {
return err
}
merr := new(multiError)
merr := new(util.MultiError)
for _, path := range files {
if err := f(path); err != nil {
merr.Add(err)
}
}
return merr.orNil()
return merr.OrNil()
}
// ConfigManager holds all runtime data derived from the configuration
......@@ -155,6 +161,7 @@ type ConfigManager struct {
config *Config
handlerMap map[string]Handler
repo Repository
seed int64
// Listeners are notified on every reload.
notifyCh chan struct{}
......@@ -198,6 +205,12 @@ func (m *ConfigManager) Reload(config *Config) error {
return err
}
seedFile := defaultSeedFile
if config.RandomSeedFile != "" {
seedFile = config.RandomSeedFile
}
seed := mustGetSeed(seedFile)
// Update config and notify listeners (in a separate
// goroutine, that does not hold the lock).
m.mx.Lock()
......@@ -208,6 +221,7 @@ func (m *ConfigManager) Reload(config *Config) error {
m.repo = repo
m.handlerMap = handlerMap
m.config = config
m.seed = seed
m.notifyCh <- struct{}{}
return nil
}
......@@ -241,7 +255,7 @@ func (m *ConfigManager) getRepository() Repository {
return m.repo
}
func (m *ConfigManager) getQueueSpec() MultiQueueSpec {
func (m *ConfigManager) getQueueSpec() jobs.QueueSpec {
m.mx.Lock()
defer m.mx.Unlock()
return m.config.Queue
......@@ -253,8 +267,33 @@ func (m *ConfigManager) getSourceSpecs() []SourceSpec {
return m.config.SourceSpecs
}
func (m *ConfigManager) getSeedFile() string {
func (m *ConfigManager) getSeed() int64 {
m.mx.Lock()
defer m.mx.Unlock()
return m.config.RandomSeedFile
return m.seed
}
func mustGetSeed(path string) int64 {
if data, err := ioutil.ReadFile(path); err == nil && len(data) == 8 { // nolint: gosec
if seed := binary.LittleEndian.Uint64(data); seed > 0 {
return int64(seed)
}
}
seed, data := randomSeed()
if err := ioutil.WriteFile(path, data, 0600); err != nil {
log.Printf("warning: can't write random seed file: %v", err)
}
return int64(seed)
}
// Generate a random uint64, and return it along with its byte
// representation (encoding/binary, little-endian).
func randomSeed() (uint64, []byte) {
// Initialize the seed from a secure source.
var b [8]byte
if _, err := rand.Read(b[:]); err != nil { // nolint: gosec
panic(err)
}
seed := binary.LittleEndian.Uint64(b[:])
return seed, b[:]
}
package tabacco
import (
"context"
"fmt"
"log"
"net/http"
"git.autistici.org/ale/tabacco/jobs"
"git.autistici.org/ale/tabacco/util"
)
// Daemon holds a Manager and a Scheduler together, and runs periodic
// backup jobs for all known sources.
type Daemon struct {
mgr Manager
sched *jobs.Scheduler
}
// NewDaemon creates a new Daemon with the specified config.
func NewDaemon(ctx context.Context, configMgr *ConfigManager, ms MetadataStore) (*Daemon, error) {
mgr, err := NewManager(ctx, configMgr, ms)
if err != nil {
return nil, err
}
// Create a Scheduler and tell the configMgr to reload its
// configuration too.
sched := jobs.NewScheduler()
configMgr.Notify(func() {
schedule, err := makeSchedule(ctx, mgr, configMgr.getSourceSpecs(), configMgr.getSeed())
if err != nil {
log.Printf("error updating scheduler: %v", err)
}
if schedule != nil {
sched.SetSchedule(schedule)
}
})
return &Daemon{
mgr: mgr,
sched: sched,
}, nil
}
// Close the Daemon and all associated resources.
func (d *Daemon) Close() {
d.mgr.Close() // nolint
d.sched.Stop()
}
func (d *Daemon) startHTTPServer(addr string) error {
//http.Handle("/debug/jobs", d.mgr.StateManager)
http.Handle("/debug/sched", d.sched)
go http.ListenAndServe(addr, nil)
return nil
}
// Create a new jobs.Schedule that will trigger a separate backup for
// each configured data source that includes a 'schedule' attribute.
func makeSchedule(ctx context.Context, m Manager, sourceSpecs []SourceSpec, hostSeed int64) (*jobs.Schedule, error) {
sched := jobs.NewSchedule(ctx, hostSeed)
merr := new(util.MultiError)
var good int
for _, spec := range sourceSpecs {
if spec.Schedule == "" {
continue
}
// Bind spec to a new closure.
err := func(spec SourceSpec) error {
return sched.Add(spec.Name, spec.Schedule, func() jobs.Job {
_, j, err := m.BackupJob(ctx, []SourceSpec{spec})
if err != nil {
log.Printf("%s: can't create backup job: %v", spec.Name, err)
}
return j
})
}(spec)
if err != nil {
merr.Add(fmt.Errorf("%s: %v", spec.Name, err))
} else {
good++
}
}
// All sources failing is a fatal error, return a nil Schedule.
if good == 0 && !merr.IsNil() {
return nil, merr
}
return sched, merr.OrNil()
}
package tabacco
package jobs
import (
"html/template"
......@@ -112,8 +112,8 @@ func init() {
// ServeHTTP implements the job status debug handler, by making the
// stateManager object match the http.Handler interface.
func (j *stateManager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
pending, running, done := j.getJobsStatus()
func (j *StateManager) ServeHTTP(w http.ResponseWriter, r *http.Request) {
pending, running, done := j.getStatus()
w.Header().Set("Content-Type", "text/html")
_ = debugTpl.Lookup("state_manager_debug_page").Execute(w, map[string]interface{}{
......
package tabacco
package jobs
import (
"container/list"
......@@ -6,41 +6,42 @@ import (
"errors"
"sync"
"time"
"git.autistici.org/ale/tabacco/util"
)
// Job is a task that can be run and canceled, and that has a
// string that can be used to identify successive instances of the
// same task, differing only in their execution time.
// Job is a task that can be run and canceled, and that has a string
// that can be used to identify successive instances of the same task,
// differing only in their execution time (so keys don't have to be
// unique). It's basically a glorified goroutine wrapper with a
// cancelable Context.
type Job interface {
Key() string
RunContext(context.Context) error
Cancel()
Wait() error
}
// A base job that is just a function. It can be canceled, and the
// result can be waited upon.
type baseJob struct {
fn func(context.Context) error
key string
done chan struct{}
err error
// Adds a Cancel method to a job.
type cancelableJob struct {
Job
done chan struct{}
err error
cancelMx sync.Mutex
cancel context.CancelFunc
}
func newJob(key string, fn func(context.Context) error) *baseJob {
return &baseJob{
fn: fn,
key: key,
// WithCancel wraps a job with a Context that can be canceled by
// calling the Cancel() method. It also implements its own Wait()
// method, so you don't have to.
func WithCancel(j Job) Job {
return &cancelableJob{
Job: j,
done: make(chan struct{}),
}
}
func (j *baseJob) Key() string { return j.key }
func (j *baseJob) RunContext(ctx context.Context) error {
func (j *cancelableJob) RunContext(ctx context.Context) error {
defer close(j.done)
j.cancelMx.Lock()
innerCtx, cancel := context.WithCancel(ctx)
......@@ -48,11 +49,11 @@ func (j *baseJob) RunContext(ctx context.Context) error {
defer cancel()
j.cancelMx.Unlock()
j.err = j.fn(innerCtx)
j.err = j.Job.RunContext(innerCtx)
return j.err
}
func (j *baseJob) Cancel() {
func (j *cancelableJob) Cancel() {
j.cancelMx.Lock()
if j.cancel != nil {
j.cancel()
......@@ -60,18 +61,40 @@ func (j *baseJob) Cancel() {
j.cancelMx.Unlock()
}
func (j *baseJob) Wait() error {
func (j *cancelableJob) Wait() error {
<-j.done
return j.err
}
// A base job that is just a function. It can be canceled, and the
// result can be waited upon.
type funcJob struct {
fn func(context.Context) error
}
// JobFunc creates a new cancelable Job that wraps a function call.
func JobFunc(fn func(context.Context) error) Job {
return WithCancel(&funcJob{
fn: fn,
})
}
func (j *funcJob) Cancel() {}
func (j *funcJob) Wait() error { return errors.New("Wait not implemented") }
func (j *funcJob) RunContext(ctx context.Context) error {
return j.fn(ctx)
}
// A job that runs with a timeout.
type timeoutJob struct {
Job
timeout time.Duration
}
func withTimeout(j Job, timeout time.Duration) Job {
// WithTimeout wraps a job with a timeout, after which the job is
// canceled.
func WithTimeout(j Job, timeout time.Duration) Job {
return &timeoutJob{Job: j, timeout: timeout}
}
......@@ -81,28 +104,33 @@ func (j *timeoutJob) RunContext(ctx context.Context) error {
return j.Job.RunContext(innerCtx)
}
// Manager that keeps an exclusive lock by Key of running jobs, and
// can apply an overrun policy to them: upon starting a new job, when
// a previous job with the same key exists, it can evict the previous
// job or abort starting the new one.
type exclusiveLockManager struct {
// ExclusiveLockManager keeps an exclusive lock of running jobs by
// Key, and can apply an overrun policy to them: upon starting a new
// job, when a previous job with the same key exists, it can evict the
// previous job or abort starting the new one.
type ExclusiveLockManager struct {
mx sync.Mutex
locked map[string]Job
}
func newExclusiveLockManager() *exclusiveLockManager {
return &exclusiveLockManager{
// NewExclusiveLockManager returns a new ExclusiveLockManager.
func NewExclusiveLockManager() *ExclusiveLockManager {
return &ExclusiveLockManager{
locked: make(map[string]Job),
}
}
func (m *exclusiveLockManager) withExclusiveLock(j Job, lockKey string, killAndRun bool) Job {
// WithExclusiveLock wraps a Job with an exclusive lock, so that no
// more than one job with this key may be running at any given
// time. The killAndRun flag selects the desired overrun policy when a
// second task is started.
func (m *ExclusiveLockManager) WithExclusiveLock(j Job, lockKey string, killAndRun bool) Job {
return &exclusiveJob{Job: j, mgr: m, lockKey: lockKey, killAndRun: killAndRun}
}
type exclusiveJob struct {
Job
mgr *exclusiveLockManager
mgr *ExclusiveLockManager
lockKey string
killAndRun bool
}
......@@ -130,28 +158,31 @@ func (j *exclusiveJob) RunContext(ctx context.Context) error {
return err
}
// Manager that can limit the number of jobs by tag. Each tag
// corresponds to a separate queue, limited to a certain number of
// QueueManager can limit the number of jobs by user-define tag. Each
// tag corresponds to a separate queue, limited to a certain number of
// parallel tasks. By default (or for unknown queues) there is no
// limit.
type multiQueueManager struct {
type QueueManager struct {
queues map[string]chan struct{}
}
// MultiQueueSpec describes the configuration of named queues.
type MultiQueueSpec struct {
// QueueSpec describes the configuration of named queues.
type QueueSpec struct {
Workers map[string]int `yaml:"workers"`
}
func newMultiQueueManager(spec MultiQueueSpec) *multiQueueManager {
// NewQueueManager returns a new QueueManager with the provided
// configuration.
func NewQueueManager(spec QueueSpec) *QueueManager {
q := make(map[string]chan struct{})
for name, n := range spec.Workers {
q[name] = make(chan struct{}, n)
}
return &multiQueueManager{queues: q}
return &QueueManager{queues: q}
}
func (m *multiQueueManager) withQueue(j Job, queue string) Job {
// WithQueue wraps a job with a concurrency limit controller.
func (m *QueueManager) WithQueue(j Job, queue string) Job {
ch, ok := m.queues[queue]
if !ok {
ch, ok = m.queues["default"]
......@@ -178,87 +209,95 @@ func (j *queuedJob) RunContext(ctx context.Context) error {
return err
}
// Enum for possible job states as tracked by the StateManager.
const (
jobStatusPending = iota
jobStatusRunning
jobStatusDone
JobStatusPending = iota
JobStatusRunning
JobStatusDone
)
type jobStatusEnum int
// JobStatus is an enum representing the state of a job.
type JobStatus int
func (s jobStatusEnum) String() string {
// String returns a text representation of the job state.
func (s JobStatus) String() string {
switch s {
case jobStatusPending:
case JobStatusPending:
return "PENDING"
case jobStatusRunning:
case JobStatusRunning:
return "RUNNING"
case jobStatusDone:
case JobStatusDone:
return "DONE"
default:
return "UNKNOWN"
}
}
// JobStatus represents the current state of a job.
type JobStatus struct {
// Status holds information on the current state of a job.
type Status struct {
ID string
Status jobStatusEnum
Status JobStatus
StartedAt time.Time
CompletedAt time.Time
Err error
Job Job
}
// Manager that adds a state to jobs and keeps track of it. This is
// basically a way to keep track of running goroutines at the level of
// granularity that we desire.
type stateManager struct {
// StateManager adds a state and ID to jobs and keeps track of them
// after they have run. This is basically a way to keep track of
// running goroutines at the level of granularity that we desire.
//
// It has no practical effect on the jobs themselves, it's just a way
// to provide the user with debugging and auditing information.
type StateManager struct {
mx sync.Mutex
pending map[string]*JobStatus
running map[string]*JobStatus
pending map[string]*Status
running map[string]*Status
done *list.List
}
func newStateManager() *stateManager {
return &stateManager{
pending: make(map[string]*JobStatus),
running: make(map[string]*JobStatus),
// How many past executions to keep in memory.
const logsToKeep = 100
// NewStateManager creates a new StateManager.
func NewStateManager() *StateManager {
return &StateManager{
pending: make(map[string]*Status),
running: make(map[string]*Status),
done: list.New(),
}
}
func (m *stateManager) setStatusPending(jobID string, j Job) {
func (m *StateManager) setStatusPending(jobID string, j Job) {
m.mx.Lock()
defer m.mx.Unlock()
m.pending[jobID] = &JobStatus{
m.pending[jobID] = &Status{
ID: jobID,
Status: jobStatusPending,
Status: JobStatusPending,
Job: j,
}
}
func (m *stateManager) setStatusRunning(jobID string) {
func (m *StateManager) setStatusRunning(jobID string) {
m.mx.Lock()
defer m.mx.Unlock()
js := m.pending[jobID]
js.StartedAt = time.Now()
js.Status = jobStatusRunning
js.Status = JobStatusRunning
delete(m.pending, jobID)
m.running[jobID] = js
}
const logsToKeep = 100
func (m *stateManager) setStatusDone(jobID string, err error) {
func (m *StateManager) setStatusDone(jobID string, err error) {
m.mx.Lock()
defer m.mx.Unlock()
js := m.running[jobID]
delete(m.running, jobID)
js.CompletedAt = time.Now()
js.Status = jobStatusDone
js.Status = JobStatusDone
js.Err = err
m.done.PushFront(js)
......@@ -267,34 +306,38 @@ func (m *stateManager) setStatusDone(jobID string, err error) {
}
}
func (m *stateManager) withTrackedStatus(j Job) Job {
// WithStatus tracks a job through its lifetime.
func (m *StateManager) WithStatus(j Job) Job {
sj := &statusJob{
Job: j,
id: randomID(),
id: util.RandomID(),
mgr: m,
}
m.setStatusPending(sj.id, sj)
return sj
}
func jobStatusMapToSlice(m map[string]*JobStatus) []JobStatus {
out := make([]JobStatus, 0, len(m))
func jobStatusMapToSlice(m map[string]*Status) []Status {
out := make([]Status, 0, len(m))
for _, js := range m {
out = append(out, *js)
}
return out
}
func jobStatusListToSlice(l *list.List) []JobStatus {
out := make([]JobStatus, 0, l.Len())
func jobStatusListToSlice(l *list.List) []Status {
out := make([]Status, 0, l.Len())
for e := l.Front(); e != nil; e = e.Next() {
js := e.Value.(*JobStatus)
js := e.Value.(*Status)
out = append(out, *js)