Commit 04055321 authored by ale's avatar ale

Merge branch 'v2' into 'master'

V2

See merge request !1
parents a277f63f 25f279d0
Pipeline #3491 failed with stages
in 1 minute and 23 seconds
......@@ -12,6 +12,7 @@ test:
image: "ai/test:go"
script:
- "./install_restic_for_tests.sh"
- "apt-get install -y liblz4-tool"
- "go-test-runner ."
except:
- master
......
......@@ -35,7 +35,8 @@ func NewAgent(ctx context.Context, configMgr *ConfigManager, ms MetadataStore) (
case <-stopCh:
return
case <-notifyCh:
schedule, err := makeSchedule(ctx, mgr, configMgr.getSourceSpecs(), configMgr.getSeed())
config := configMgr.current()
schedule, err := makeSchedule(ctx, mgr, config.SourceSpecs(), config.Seed())
if err != nil {
log.Printf("error updating scheduler: %v", err)
}
......@@ -63,7 +64,7 @@ func (a *Agent) Close() {
// Create a new jobs.Schedule that will trigger a separate backup for
// each configured data source that includes a 'schedule' attribute.
func makeSchedule(ctx context.Context, m Manager, sourceSpecs []SourceSpec, hostSeed int64) (*jobs.Schedule, error) {
func makeSchedule(ctx context.Context, m Manager, sourceSpecs []*SourceSpec, hostSeed int64) (*jobs.Schedule, error) {
sched := jobs.NewSchedule(ctx, hostSeed)
merr := new(util.MultiError)
var good int
......@@ -72,9 +73,9 @@ func makeSchedule(ctx context.Context, m Manager, sourceSpecs []SourceSpec, host
continue
}
// Bind spec to a new closure.
err := func(spec SourceSpec) error {
err := func(spec *SourceSpec) error {
return sched.Add(spec.Name, spec.Schedule, func() jobs.Job {
_, j, err := m.BackupJob(ctx, []SourceSpec{spec})
_, j, err := m.BackupJob(ctx, spec)
if err != nil {
log.Printf("%s: can't create backup job: %v", spec.Name, err)
}
......
......@@ -9,19 +9,19 @@ import (
type fakeManager struct{}
func (m *fakeManager) BackupJob(context.Context, []SourceSpec) (Backup, jobs.Job, error) {
return Backup{}, nil, nil
func (m *fakeManager) BackupJob(context.Context, *SourceSpec) (*Backup, jobs.Job, error) {
return &Backup{}, nil, nil
}
func (m *fakeManager) Backup(context.Context, []SourceSpec) (Backup, error) {
return Backup{}, nil
func (m *fakeManager) Backup(context.Context, *SourceSpec) (*Backup, error) {
return &Backup{}, nil
}
func (m *fakeManager) RestoreJob(context.Context, FindRequest, string) (jobs.Job, error) {
func (m *fakeManager) RestoreJob(context.Context, *FindRequest, string) (jobs.Job, error) {
return nil, nil
}
func (m *fakeManager) Restore(context.Context, FindRequest, string) error {
func (m *fakeManager) Restore(context.Context, *FindRequest, string) error {
return nil
}
......@@ -34,27 +34,31 @@ func (m *fakeManager) GetStatus() ([]jobs.Status, []jobs.Status, []jobs.Status)
}
func TestMakeSchedule(t *testing.T) {
sourceSpecs := []SourceSpec{
{
Name: "source1",
sourceSpecs := []*SourceSpec{
&SourceSpec{
Name: "source1/users",
Handler: "file1",
Schedule: "@random_every 1d",
Atoms: []Atom{
{
Name: "user1",
RelativePath: "user1",
},
{
Name: "user2",
RelativePath: "user2",
Datasets: []*DatasetSpec{
&DatasetSpec{
Atoms: []Atom{
{
Name: "user1",
Path: "user1",
},
{
Name: "user2",
Path: "user2",
},
},
},
},
},
{
Name: "source2",
Handler: "dbpipe",
Schedule: "35 3 * * *",
AtomsCommand: "echo user1 user1 ; echo user2 user2",
&SourceSpec{
Name: "source2",
Handler: "dbpipe",
Schedule: "35 3 * * *",
DatasetsCommand: "echo user1 user1 ; echo user2 user2",
},
}
......
package main
import (
"context"
"encoding/json"
"errors"
"flag"
"log"
"os"
"time"
"github.com/google/subcommands"
"git.autistici.org/ai3/tools/tabacco"
mdbc "git.autistici.org/ai3/tools/tabacco/metadb/client"
)
var rpcTimeout = 120 * time.Second
type queryCommand struct {
configPath string
host string
numVersions int
}
func (c *queryCommand) Name() string { return "query" }
func (c *queryCommand) Synopsis() string { return "query the backup metadata database" }
func (c *queryCommand) Usage() string {
return `query [<flags>] <atom_pattern>
Query the backup metadata database.
`
}
func (c *queryCommand) SetFlags(f *flag.FlagSet) {
f.StringVar(&c.configPath, "config", "/etc/tabacco/agent.yml", "configuration `file`")
f.StringVar(&c.host, "host", "", "filter by host")
f.IntVar(&c.numVersions, "num-versions", 1, "return the most recent `N` versions")
}
func (c *queryCommand) buildRequest(f *flag.FlagSet) (*tabacco.FindRequest, error) {
if f.NArg() != 1 {
return nil, errors.New("error: wrong number of arguments")
}
return &tabacco.FindRequest{
Pattern: f.Arg(0),
Host: c.host,
NumVersions: c.numVersions,
}, nil
}
func (c *queryCommand) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus {
req, err := c.buildRequest(f)
if err != nil {
log.Printf("error in request: %v", err)
return subcommands.ExitUsageError
}
// Parse configuration and connect to the metadata store.
config, err := tabacco.ReadConfig(c.configPath)
if err != nil {
log.Printf("error reading config: %v", err)
return subcommands.ExitFailure
}
store, err := mdbc.New(config.MetadataStoreBackend)
if err != nil {
log.Printf("error in metadata client config: %v", err)
return subcommands.ExitFailure
}
// Make the RPC.
rctx, cancel := context.WithTimeout(ctx, rpcTimeout)
defer cancel()
result, err := store.FindAtoms(rctx, req)
if err != nil {
log.Printf("FindAtoms() error: %v", err)
return subcommands.ExitFailure
}
data, _ := json.MarshalIndent(result, "", " ")
os.Stdout.Write(data)
return subcommands.ExitSuccess
}
func init() {
subcommands.Register(&queryCommand{}, "")
}
......@@ -77,8 +77,8 @@ func (c *restoreCommand) buildRestoreJob(ctx context.Context, mgr tabacco.Manage
return jobs.AsyncGroup(restoreJobs), nil
}
func (c *restoreCommand) newFindRequest(s string) tabacco.FindRequest {
return tabacco.FindRequest{
func (c *restoreCommand) newFindRequest(s string) *tabacco.FindRequest {
return &tabacco.FindRequest{
Pattern: s,
}
}
......
......@@ -23,7 +23,7 @@ var defaultSeedFile = "/var/tmp/.tabacco_scheduler_seed"
// holds it all together.
type Config struct {
Hostname string `yaml:"hostname"`
Queue jobs.QueueSpec `yaml:"queue_config"`
Queue *jobs.QueueSpec `yaml:"queue_config"`
Repository RepositorySpec `yaml:"repository"`
DryRun bool `yaml:"dry_run"`
DefaultNiceLevel int `yaml:"default_nice_level"`
......@@ -32,43 +32,83 @@ type Config struct {
RandomSeedFile string `yaml:"random_seed_file"`
MetadataStoreBackend *clientutil.BackendConfig `yaml:"metadb"`
HandlerSpecs []HandlerSpec
SourceSpecs []SourceSpec
HandlerSpecs []*HandlerSpec
SourceSpecs []*SourceSpec
}
type runtimeAssets struct {
handlerMap map[string]Handler
repo Repository
seed int64
shell *Shell
// RuntimeContext provides access to runtime objects whose lifetime is
// ultimately tied to the configuration. Configuration can change
// during the lifetime of the process, but we want backup jobs to have
// a consistent view of the configuration while they execute, so
// access to the current version of the configuration is controlled to
// the ConfigManager.
type RuntimeContext interface {
Shell() *Shell
Repo() Repository
QueueSpec() *jobs.QueueSpec
Seed() int64
WorkDir() string
SourceSpecs() []*SourceSpec
FindSource(string) *SourceSpec
HandlerSpec(string) *HandlerSpec
Close()
}
func (a *runtimeAssets) Close() {
// The set of objects that are created from a Config and that the main
// code cares about.
type parsedConfig struct {
handlerMap map[string]*HandlerSpec
sourceSpecs []*SourceSpec
sourceSpecsByName map[string]*SourceSpec
queue *jobs.QueueSpec
repo Repository
seed int64
shell *Shell
workDir string
}
func (a *parsedConfig) Close() {
a.repo.Close() // nolint
}
func buildHandlerMap(specs []HandlerSpec, shell *Shell) (map[string]Handler, error) {
m := make(map[string]Handler)
merr := new(util.MultiError)
for _, spec := range specs {
h, err := spec.Parse(shell)
if err != nil {
merr.Add(err)
continue
}
m[spec.Name] = h
func (a *parsedConfig) Shell() *Shell { return a.shell }
func (a *parsedConfig) Repo() Repository { return a.repo }
func (a *parsedConfig) QueueSpec() *jobs.QueueSpec { return a.queue }
func (a *parsedConfig) Seed() int64 { return a.seed }
func (a *parsedConfig) WorkDir() string { return a.workDir }
func (a *parsedConfig) SourceSpecs() []*SourceSpec { return a.sourceSpecs }
func (a *parsedConfig) HandlerSpec(name string) *HandlerSpec {
return a.handlerMap[name]
}
func (a *parsedConfig) FindSource(name string) *SourceSpec {
return a.sourceSpecsByName[name]
}
func buildHandlerMap(specs []*HandlerSpec) map[string]*HandlerSpec {
// Create a handler map with a default 'file' spec.
m := map[string]*HandlerSpec{
"file": &HandlerSpec{
Name: "file",
Type: "file",
},
}
for _, h := range specs {
m[h.Name] = h
}
return m, merr.OrNil()
return m
}
func (c *Config) parse() (*runtimeAssets, error) {
func (c *Config) parse() (*parsedConfig, error) {
shell := NewShell(c.DryRun)
shell.SetNiceLevel(c.DefaultNiceLevel)
shell.SetIOClass(c.DefaultIOClass)
// Parse the repository config. An error here is fatal, as we
// don't have a way to operate without a repository.
repo, err := c.Repository.Parse(shell)
repo, err := c.Repository.Parse()
if err != nil {
return nil, err
}
......@@ -76,21 +116,26 @@ func (c *Config) parse() (*runtimeAssets, error) {
merr := new(util.MultiError)
// Build the handlers.
handlerMap, err := buildHandlerMap(c.HandlerSpecs, shell)
if err != nil {
merr.Add(err)
}
handlerMap := buildHandlerMap(c.HandlerSpecs)
// Validate the sources (Parse is called later at runtime).
var srcs []SourceSpec
// Sources that fail the check are removed from the
// SourceSpecs array. We also check that sources have unique
// names.
srcMap := make(map[string]*SourceSpec)
var srcs []*SourceSpec
for _, spec := range c.SourceSpecs {
if err := spec.Check(handlerMap); err != nil {
merr.Add(err)
merr.Add(fmt.Errorf("source %s: %v", spec.Name, err))
continue
}
if _, ok := srcMap[spec.Name]; ok {
merr.Add(fmt.Errorf("duplicated source %s", spec.Name))
continue
}
srcMap[spec.Name] = spec
srcs = append(srcs, spec)
}
c.SourceSpecs = srcs
// Read (or create) the seed file.
seedFile := defaultSeedFile
......@@ -99,28 +144,32 @@ func (c *Config) parse() (*runtimeAssets, error) {
}
seed := mustGetSeed(seedFile)
return &runtimeAssets{
shell: shell,
repo: repo,
handlerMap: handlerMap,
seed: seed,
return &parsedConfig{
handlerMap: handlerMap,
sourceSpecs: srcs,
sourceSpecsByName: srcMap,
queue: c.Queue,
shell: shell,
repo: repo,
seed: seed,
workDir: c.WorkDir,
}, merr.OrNil()
}
// The following functions read YAML files from .d-style directories. To be nice
// to the user, each file can contain either a single object or a list of
// multiple objects.
func readHandlersFromDir(dir string) ([]HandlerSpec, error) {
var out []HandlerSpec
func readHandlersFromDir(dir string) ([]*HandlerSpec, error) {
var out []*HandlerSpec
err := foreachYAMLFile(dir, func(path string) error {
var specs []HandlerSpec
var specs []*HandlerSpec
log.Printf("reading handler: %s", path)
if err := readYAMLFile(path, &specs); err != nil {
var spec HandlerSpec
if err := readYAMLFile(path, &spec); err != nil {
return err
}
specs = []HandlerSpec{spec}
specs = append(specs, &spec)
}
out = append(out, specs...)
return nil
......@@ -128,17 +177,17 @@ func readHandlersFromDir(dir string) ([]HandlerSpec, error) {
return out, err
}
func readSourcesFromDir(dir string) ([]SourceSpec, error) {
var out []SourceSpec
func readSourcesFromDir(dir string) ([]*SourceSpec, error) {
var out []*SourceSpec
err := foreachYAMLFile(dir, func(path string) error {
var specs []SourceSpec
var specs []*SourceSpec
log.Printf("reading source: %s", path)
if err := readYAMLFile(path, &specs); err != nil {
var spec SourceSpec
if err := readYAMLFile(path, &spec); err != nil {
return err
}
specs = []SourceSpec{spec}
specs = append(specs, &spec)
}
out = append(out, specs...)
return nil
......@@ -215,6 +264,7 @@ func foreachYAMLFile(dir string, f func(string) error) error {
merr := new(util.MultiError)
for _, path := range files {
if err := f(path); err != nil {
log.Printf("error loading yaml file %s: %v", path, err)
merr.Add(err)
}
}
......@@ -228,8 +278,7 @@ func foreachYAMLFile(dir string, f func(string) error) error {
// unregister).
type ConfigManager struct {
mx sync.Mutex
config *Config
assets *runtimeAssets
parsed *parsedConfig
// Listeners are notified on every reload.
notifyCh chan struct{}
......@@ -246,12 +295,14 @@ func NewConfigManager(config *Config) (*ConfigManager, error) {
}
go func() {
for range m.notifyCh {
m.mx.Lock()
for _, lch := range m.listeners {
select {
case lch <- struct{}{}:
default:
}
}
m.mx.Unlock()
}
}()
return m, nil
......@@ -260,22 +311,23 @@ func NewConfigManager(config *Config) (*ConfigManager, error) {
// Reload the configuration (at least, the parts of it that can be
// dynamically reloaded).
func (m *ConfigManager) Reload(config *Config) error {
assets, err := config.parse()
if assets == nil {
parsed, err := config.parse()
if parsed == nil {
return err
} else if err != nil {
log.Printf("warning: errors in configuration: %v", err)
}
// Update config and notify listeners (in a separate
// goroutine, that does not hold the lock).
m.mx.Lock()
defer m.mx.Unlock()
if m.assets != nil {
m.assets.Close() // nolint
if m.parsed != nil {
m.parsed.Close() // nolint
}
log.Printf("loaded new config: %d handlers, %d sources", len(assets.handlerMap), len(config.SourceSpecs))
m.assets = assets
m.config = config
log.Printf("loaded new config: %d handlers, %d sources", len(parsed.handlerMap), len(parsed.sourceSpecs))
m.parsed = parsed
m.notifyCh <- struct{}{}
return nil
}
......@@ -284,8 +336,8 @@ func (m *ConfigManager) Reload(config *Config) error {
func (m *ConfigManager) Close() {
m.mx.Lock()
close(m.notifyCh)
if m.assets != nil {
m.assets.Close()
if m.parsed != nil {
m.parsed.Close()
}
m.mx.Unlock()
}
......@@ -303,47 +355,16 @@ func (m *ConfigManager) Notify() <-chan struct{} {
return ch
}
func (m *ConfigManager) getHandler(name string) (Handler, bool) {
m.mx.Lock()
defer m.mx.Unlock()
h, ok := m.assets.handlerMap[name]
return h, ok
}
func (m *ConfigManager) getRepository() Repository {
m.mx.Lock()
defer m.mx.Unlock()
return m.assets.repo
}
func (m *ConfigManager) getQueueSpec() jobs.QueueSpec {
m.mx.Lock()
defer m.mx.Unlock()
return m.config.Queue
}
func (m *ConfigManager) getSourceSpecs() []SourceSpec {
m.mx.Lock()
defer m.mx.Unlock()
return m.config.SourceSpecs
}
func (m *ConfigManager) getSeed() int64 {
m.mx.Lock()
defer m.mx.Unlock()
return m.assets.seed
}
func (m *ConfigManager) getShell() *Shell {
m.mx.Lock()
defer m.mx.Unlock()
return m.assets.shell
// NewRuntimeContext returns a new RuntimeContext, capturing current
// configuration and runtime assets.
func (m *ConfigManager) NewRuntimeContext() RuntimeContext {
return m.current()
}
func (m *ConfigManager) getWorkDir() string {
func (m *ConfigManager) current() *parsedConfig {
m.mx.Lock()
defer m.mx.Unlock()
return m.config.WorkDir
return m.parsed
}
func mustGetSeed(path string) int64 {
......
......@@ -4,9 +4,10 @@ import (
"context"
"errors"
"fmt"
"log"
"os"
"strings"
"testing"
"time"
)
func TestReadConfig(t *testing.T) {
......@@ -27,6 +28,7 @@ func TestConfigManager(t *testing.T) {
if err != nil {
t.Fatal("ReadConfig()", err)
}
log.Printf("loaded %d sources", len(conf.SourceSpecs))
mgr, err := NewConfigManager(conf)
if err != nil {
t.Fatal("NewConfigManager()", err)
......@@ -34,13 +36,18 @@ func TestConfigManager(t *testing.T) {
defer mgr.Close()
// Test one of the accessor methods.
if s := mgr.getSourceSpecs(); len(s) != 1 {
t.Fatalf("getSourceSpecs() bad result: %+v", s)
if s := mgr.current().SourceSpecs(); len(s) != 1 {
t.Fatalf("current().SourceSpecs() bad result: %+v", s)
}
// Test the Notify() mechanism by checking that it triggers
// right away when setting up a new listener.
<-mgr.Notify()
tmr := time.NewTimer(1 * time.Second)
select {
case <-mgr.Notify():
case <-tmr.C:
t.Fatal("Notify() channel did not trigger")
}
}
func TestRandomSeed(t *testing.T) {
......@@ -61,7 +68,7 @@ func TestConfig_Parse(t *testing.T) {
type testdata struct {
config *Config
expectedOK bool
checkFn func(*runtimeAssets, []Dataset) error
checkFn func([]*Dataset) error
}
tdd := []testdata{
// The following tests cover a few ways to generate
......@@ -69,24 +76,27 @@ func TestConfig_Parse(t *testing.T) {
// the README.
{
&Config{
SourceSpecs: []SourceSpec{
{
Name: "users/account1",
Handler: "file",
Atoms: []Atom{
{RelativePath: "/data/account1"},
},
},
{
Name: "users/account2",
Handler: "file",
Atoms: []Atom{
{RelativePath: "/data/account2"},
SourceSpecs: []*SourceSpec{
&SourceSpec{
Name: "users",
Handler: "file",
Schedule: "@random_every 24h",
Datasets: []*DatasetSpec{
&DatasetSpec{
Atoms: []Atom{
{Name: "account1"},
},
},
{
Atoms: []Atom{
{Name: "account2"},
},
},
},
},
},
HandlerSpecs: []HandlerSpec{
{
HandlerSpecs: []*HandlerSpec{
&HandlerSpec{
Name: "file",
Type: "file",
Params: map[string]interface{}{"path": "/"},
......@@ -98,18 +108,23 @@ func TestConfig_Parse(t *testing.T) {
},
{
&Config{
SourceSpecs: []SourceSpec{
{
Name: "users",
Handler: "file",
Atoms: []Atom{
{Name: "account1"},
{Name: "account2"},
SourceSpecs: []*SourceSpec{
&SourceSpec{
Name: "users",
Handler: "file",
Schedule: "@random_every 24h",
Datasets: []*DatasetSpec{
&DatasetSpec{
Atoms: []Atom{
{Name: "account1"},
{Name: "account2"},
},
},
},
},
},
HandlerSpecs: []HandlerSpec{
{
HandlerSpecs: []*HandlerSpec{
&HandlerSpec{
Name: "file",
Type: "file",
Params: map[string]interface{}{"path": "/data"},
......@@ -121,15 +136,16 @@ func TestConfig_Parse(t *testing.T) {
},
{
&Config{
SourceSpecs: []SourceSpec{
{
Name: "users",
Handler: "file",
AtomsCommand: "echo account1; echo account2",
SourceSpecs: []*SourceSpec{
&SourceSpec{
Name: "users",
Handler: "file",
Schedule: "@random_every 24h",
DatasetsCommand: "echo '[{atoms: [{name: account1}, {name: account2}]}]'",
},
},
HandlerSpecs: []HandlerSpec{
{
HandlerSpecs: []*HandlerSpec{
&HandlerSpec{
Name: "file",
Type: "file",
Params: map[string]interface{}{"path": "/data"},
......@@ -150,53 +166,51 @@ func TestConfig_Parse(t *testing.T) {
"password": "hello",
}
ra, err := td.config.parse()
parsed, err := td.config.parse()
if err != nil && td.expectedOK {
t.Errorf("unexpected error for config %+v: %v", td.config, err)
} else if err == nil && !td.expectedOK {
t.Errorf("missing error for config %+v", td.config)
} else {
datasets, err := parseAllSources(ra, td.config.SourceSpecs)
datasets, err := parseAllSources(parsed.SourceSpecs())
if err != nil {
t.Errorf("failed to parse sources %+v: %v", td.config.SourceSpecs, err)
}
if td.checkFn != nil {
if err := td.checkFn(ra, datasets); err != nil {
if err := td.checkFn(datasets); err != nil {
t.Errorf("check failed for config %+v: %v", td.config, err)
}
}
}
if ra != nil {
ra.Close()
if parsed != nil {
parsed.Close()
}
}
}
func parseAllSources(ra *runtimeAssets, specs []SourceSpec) ([]Dataset, error) {
var out []Dataset
func parseAllSources(specs []*SourceSpec) ([]*Dataset, error) {
var out []*Dataset
for _, spec := range specs {
ds, err := spec.Parse(context.Background()) <