From 35f8a72e80d10dc5aa589ab2e89654b502fde4c2 Mon Sep 17 00:00:00 2001 From: ale Date: Mon, 17 Jun 2019 22:36:04 +0100 Subject: [PATCH 01/12] Refactor, take one Try to make things simpler and more clear. Specifically: * clarify the semantics of Source, Dataset and Atom * separate the namespace of Atom paths from the Restic namespace * simplify the command execution by delegating pipes to /bin/sh for efficiency There's still a bunch to do. --- agent.go | 6 +- agent_test.go | 47 ++-- cmd/tabacco/restore.go | 4 +- config.go | 92 ++++--- config_test.go | 102 ++++---- handler_file.go | 68 ++--- handler_pipe.go | 136 ++++------ handlers.go | 25 +- manager.go | 247 ++++++++++--------- manager_test.go | 121 +++++---- metadb/client/client.go | 18 +- metadb/migrations/1_initialize_schema.up.sql | 8 +- metadb/migrations/bindata.go | 12 +- metadb/server/http.go | 2 +- metadb/server/service.go | 120 +++++---- metadb/server/service_test.go | 20 +- repository.go | 4 +- repository_restic.go | 106 ++++---- repository_restic_test.go | 147 +++++++++-- shell.go | 66 ----- source.go | 182 +++++++++----- testdata/sources/source.yml | 5 + types.go | 79 +++--- 23 files changed, 897 insertions(+), 720 deletions(-) diff --git a/agent.go b/agent.go index 533b21e..21e2ba0 100644 --- a/agent.go +++ b/agent.go @@ -63,7 +63,7 @@ func (a *Agent) Close() { // Create a new jobs.Schedule that will trigger a separate backup for // each configured data source that includes a 'schedule' attribute. -func makeSchedule(ctx context.Context, m Manager, sourceSpecs []SourceSpec, hostSeed int64) (*jobs.Schedule, error) { +func makeSchedule(ctx context.Context, m Manager, sourceSpecs []*SourceSpec, hostSeed int64) (*jobs.Schedule, error) { sched := jobs.NewSchedule(ctx, hostSeed) merr := new(util.MultiError) var good int @@ -72,9 +72,9 @@ func makeSchedule(ctx context.Context, m Manager, sourceSpecs []SourceSpec, host continue } // Bind spec to a new closure. - err := func(spec SourceSpec) error { + err := func(spec *SourceSpec) error { return sched.Add(spec.Name, spec.Schedule, func() jobs.Job { - _, j, err := m.BackupJob(ctx, []SourceSpec{spec}) + _, j, err := m.BackupJob(ctx, spec) if err != nil { log.Printf("%s: can't create backup job: %v", spec.Name, err) } diff --git a/agent_test.go b/agent_test.go index 2ef2c16..4daca34 100644 --- a/agent_test.go +++ b/agent_test.go @@ -9,19 +9,19 @@ import ( type fakeManager struct{} -func (m *fakeManager) BackupJob(context.Context, []SourceSpec) (Backup, jobs.Job, error) { - return Backup{}, nil, nil +func (m *fakeManager) BackupJob(context.Context, *SourceSpec) (*Backup, jobs.Job, error) { + return &Backup{}, nil, nil } -func (m *fakeManager) Backup(context.Context, []SourceSpec) (Backup, error) { - return Backup{}, nil +func (m *fakeManager) Backup(context.Context, *SourceSpec) (*Backup, error) { + return &Backup{}, nil } -func (m *fakeManager) RestoreJob(context.Context, FindRequest, string) (jobs.Job, error) { +func (m *fakeManager) RestoreJob(context.Context, *FindRequest, string) (jobs.Job, error) { return nil, nil } -func (m *fakeManager) Restore(context.Context, FindRequest, string) error { +func (m *fakeManager) Restore(context.Context, *FindRequest, string) error { return nil } @@ -34,27 +34,32 @@ func (m *fakeManager) GetStatus() ([]jobs.Status, []jobs.Status, []jobs.Status) } func TestMakeSchedule(t *testing.T) { - sourceSpecs := []SourceSpec{ - { + sourceSpecs := []*SourceSpec{ + &SourceSpec{ Name: "source1", Handler: "file1", Schedule: "@random_every 1d", - Atoms: []Atom{ - { - Name: "user1", - RelativePath: "user1", - }, - { - Name: "user2", - RelativePath: "user2", + Datasets: []*DatasetSpec{ + &DatasetSpec{ + Name: "users", + Atoms: []Atom{ + { + Name: "user1", + Path: "user1", + }, + { + Name: "user2", + Path: "user2", + }, + }, }, }, }, - { - Name: "source2", - Handler: "dbpipe", - Schedule: "35 3 * * *", - AtomsCommand: "echo user1 user1 ; echo user2 user2", + &SourceSpec{ + Name: "source2", + Handler: "dbpipe", + Schedule: "35 3 * * *", + DatasetsCommand: "echo user1 user1 ; echo user2 user2", }, } diff --git a/cmd/tabacco/restore.go b/cmd/tabacco/restore.go index 88cdc65..efa5b5e 100644 --- a/cmd/tabacco/restore.go +++ b/cmd/tabacco/restore.go @@ -77,8 +77,8 @@ func (c *restoreCommand) buildRestoreJob(ctx context.Context, mgr tabacco.Manage return jobs.AsyncGroup(restoreJobs), nil } -func (c *restoreCommand) newFindRequest(s string) tabacco.FindRequest { - return tabacco.FindRequest{ +func (c *restoreCommand) newFindRequest(s string) *tabacco.FindRequest { + return &tabacco.FindRequest{ Pattern: s, } } diff --git a/config.go b/config.go index a31c475..0a0c8aa 100644 --- a/config.go +++ b/config.go @@ -32,12 +32,24 @@ type Config struct { RandomSeedFile string `yaml:"random_seed_file"` MetadataStoreBackend *clientutil.BackendConfig `yaml:"metadb"` - HandlerSpecs []HandlerSpec - SourceSpecs []SourceSpec + HandlerSpecs []*HandlerSpec + SourceSpecs []*SourceSpec } +// RuntimeContext provides access to runtime objects whose lifetime is +// ultimately tied to the configuration. +type RuntimeContext interface { + Shell() *Shell + Close() +} + +// The set of objects that are created from a Config. Can change, so +// its access is controlled by the ConfigManager. However it stays +// fixed during a running backup. +// +// This is an implementation of RuntimeContext. type runtimeAssets struct { - handlerMap map[string]Handler + handlerMap map[string]*HandlerSpec repo Repository seed int64 shell *Shell @@ -47,18 +59,16 @@ func (a *runtimeAssets) Close() { a.repo.Close() // nolint } -func buildHandlerMap(specs []HandlerSpec, shell *Shell) (map[string]Handler, error) { - m := make(map[string]Handler) - merr := new(util.MultiError) - for _, spec := range specs { - h, err := spec.Parse(shell) - if err != nil { - merr.Add(err) - continue - } - m[spec.Name] = h +func (a *runtimeAssets) Shell() *Shell { + return a.shell +} + +func buildHandlerMap(specs []*HandlerSpec) map[string]*HandlerSpec { + m := make(map[string]*HandlerSpec) + for _, h := range specs { + m[h.Name] = h } - return m, merr.OrNil() + return m } func (c *Config) parse() (*runtimeAssets, error) { @@ -68,7 +78,7 @@ func (c *Config) parse() (*runtimeAssets, error) { // Parse the repository config. An error here is fatal, as we // don't have a way to operate without a repository. - repo, err := c.Repository.Parse(shell) + repo, err := c.Repository.Parse() if err != nil { return nil, err } @@ -76,16 +86,15 @@ func (c *Config) parse() (*runtimeAssets, error) { merr := new(util.MultiError) // Build the handlers. - handlerMap, err := buildHandlerMap(c.HandlerSpecs, shell) - if err != nil { - merr.Add(err) - } + handlerMap := buildHandlerMap(c.HandlerSpecs) // Validate the sources (Parse is called later at runtime). - var srcs []SourceSpec + // Sources that fail the check are removed from the + // SourceSpecs array. + var srcs []*SourceSpec for _, spec := range c.SourceSpecs { if err := spec.Check(handlerMap); err != nil { - merr.Add(err) + merr.Add(fmt.Errorf("source %s: %v", spec.Name, err)) continue } srcs = append(srcs, spec) @@ -110,17 +119,17 @@ func (c *Config) parse() (*runtimeAssets, error) { // The following functions read YAML files from .d-style directories. To be nice // to the user, each file can contain either a single object or a list of // multiple objects. -func readHandlersFromDir(dir string) ([]HandlerSpec, error) { - var out []HandlerSpec +func readHandlersFromDir(dir string) ([]*HandlerSpec, error) { + var out []*HandlerSpec err := foreachYAMLFile(dir, func(path string) error { - var specs []HandlerSpec + var specs []*HandlerSpec log.Printf("reading handler: %s", path) if err := readYAMLFile(path, &specs); err != nil { var spec HandlerSpec if err := readYAMLFile(path, &spec); err != nil { return err } - specs = []HandlerSpec{spec} + specs = append(specs, &spec) } out = append(out, specs...) return nil @@ -128,17 +137,17 @@ func readHandlersFromDir(dir string) ([]HandlerSpec, error) { return out, err } -func readSourcesFromDir(dir string) ([]SourceSpec, error) { - var out []SourceSpec +func readSourcesFromDir(dir string) ([]*SourceSpec, error) { + var out []*SourceSpec err := foreachYAMLFile(dir, func(path string) error { - var specs []SourceSpec + var specs []*SourceSpec log.Printf("reading source: %s", path) if err := readYAMLFile(path, &specs); err != nil { var spec SourceSpec if err := readYAMLFile(path, &spec); err != nil { return err } - specs = []SourceSpec{spec} + specs = append(specs, &spec) } out = append(out, specs...) return nil @@ -215,6 +224,7 @@ func foreachYAMLFile(dir string, f func(string) error) error { merr := new(util.MultiError) for _, path := range files { if err := f(path); err != nil { + log.Printf("error loading yaml file %s: %v", path, err) merr.Add(err) } } @@ -263,6 +273,8 @@ func (m *ConfigManager) Reload(config *Config) error { assets, err := config.parse() if assets == nil { return err + } else if err != nil { + log.Printf("warning: errors in configuration: %v", err) } // Update config and notify listeners (in a separate @@ -303,7 +315,12 @@ func (m *ConfigManager) Notify() <-chan struct{} { return ch } -func (m *ConfigManager) getHandler(name string) (Handler, bool) { +// Captures current runtime assets into a RuntimeContext +func (m *ConfigManager) newRuntimeContext() RuntimeContext { + return m.assets +} + +func (m *ConfigManager) getHandlerSpec(name string) (*HandlerSpec, bool) { m.mx.Lock() defer m.mx.Unlock() h, ok := m.assets.handlerMap[name] @@ -322,22 +339,27 @@ func (m *ConfigManager) getQueueSpec() jobs.QueueSpec { return m.config.Queue } -func (m *ConfigManager) getSourceSpecs() []SourceSpec { +func (m *ConfigManager) getSourceSpecs() []*SourceSpec { m.mx.Lock() defer m.mx.Unlock() return m.config.SourceSpecs } -func (m *ConfigManager) getSeed() int64 { +func (m *ConfigManager) findSource(name string) *SourceSpec { m.mx.Lock() defer m.mx.Unlock() - return m.assets.seed + for _, src := range m.config.SourceSpecs { + if src.Name == name { + return src + } + } + return nil } -func (m *ConfigManager) getShell() *Shell { +func (m *ConfigManager) getSeed() int64 { m.mx.Lock() defer m.mx.Unlock() - return m.assets.shell + return m.assets.seed } func (m *ConfigManager) getWorkDir() string { diff --git a/config_test.go b/config_test.go index dcf00e5..5bf2e4b 100644 --- a/config_test.go +++ b/config_test.go @@ -4,9 +4,11 @@ import ( "context" "errors" "fmt" + "log" "os" "strings" "testing" + "time" ) func TestReadConfig(t *testing.T) { @@ -27,6 +29,7 @@ func TestConfigManager(t *testing.T) { if err != nil { t.Fatal("ReadConfig()", err) } + log.Printf("loaded %d sources", len(conf.SourceSpecs)) mgr, err := NewConfigManager(conf) if err != nil { t.Fatal("NewConfigManager()", err) @@ -40,7 +43,12 @@ func TestConfigManager(t *testing.T) { // Test the Notify() mechanism by checking that it triggers // right away when setting up a new listener. - <-mgr.Notify() + tmr := time.NewTimer(1 * time.Second) + select { + case <-mgr.Notify(): + case <-tmr.C: + t.Fatal("Notify() channel did not trigger") + } } func TestRandomSeed(t *testing.T) { @@ -61,7 +69,7 @@ func TestConfig_Parse(t *testing.T) { type testdata struct { config *Config expectedOK bool - checkFn func(*runtimeAssets, []Dataset) error + checkFn func(*runtimeAssets, []*Dataset) error } tdd := []testdata{ // The following tests cover a few ways to generate @@ -69,24 +77,29 @@ func TestConfig_Parse(t *testing.T) { // the README. { &Config{ - SourceSpecs: []SourceSpec{ - { - Name: "users/account1", - Handler: "file", - Atoms: []Atom{ - {RelativePath: "/data/account1"}, - }, - }, - { - Name: "users/account2", - Handler: "file", - Atoms: []Atom{ - {RelativePath: "/data/account2"}, + SourceSpecs: []*SourceSpec{ + &SourceSpec{ + Name: "users", + Handler: "file", + Schedule: "@random_every 24h", + Datasets: []*DatasetSpec{ + &DatasetSpec{ + Name: "account1", + Atoms: []Atom{ + {Name: "account1"}, + }, + }, + { + Name: "account2", + Atoms: []Atom{ + {Name: "account2"}, + }, + }, }, }, }, - HandlerSpecs: []HandlerSpec{ - { + HandlerSpecs: []*HandlerSpec{ + &HandlerSpec{ Name: "file", Type: "file", Params: map[string]interface{}{"path": "/"}, @@ -98,18 +111,24 @@ func TestConfig_Parse(t *testing.T) { }, { &Config{ - SourceSpecs: []SourceSpec{ - { - Name: "users", - Handler: "file", - Atoms: []Atom{ - {Name: "account1"}, - {Name: "account2"}, + SourceSpecs: []*SourceSpec{ + &SourceSpec{ + Name: "users", + Handler: "file", + Schedule: "@random_every 24h", + Datasets: []*DatasetSpec{ + &DatasetSpec{ + Name: "users", + Atoms: []Atom{ + {Name: "account1"}, + {Name: "account2"}, + }, + }, }, }, }, - HandlerSpecs: []HandlerSpec{ - { + HandlerSpecs: []*HandlerSpec{ + &HandlerSpec{ Name: "file", Type: "file", Params: map[string]interface{}{"path": "/data"}, @@ -121,15 +140,16 @@ func TestConfig_Parse(t *testing.T) { }, { &Config{ - SourceSpecs: []SourceSpec{ - { - Name: "users", - Handler: "file", - AtomsCommand: "echo account1; echo account2", + SourceSpecs: []*SourceSpec{ + &SourceSpec{ + Name: "users", + Handler: "file", + Schedule: "@random_every 24h", + DatasetsCommand: "echo '[{name: account1, atoms: [{name: account1}, {name: account2}]}]'", }, }, - HandlerSpecs: []HandlerSpec{ - { + HandlerSpecs: []*HandlerSpec{ + &HandlerSpec{ Name: "file", Type: "file", Params: map[string]interface{}{"path": "/data"}, @@ -172,31 +192,27 @@ func TestConfig_Parse(t *testing.T) { } } -func parseAllSources(ra *runtimeAssets, specs []SourceSpec) ([]Dataset, error) { - var out []Dataset +func parseAllSources(ra *runtimeAssets, specs []*SourceSpec) ([]*Dataset, error) { + var out []*Dataset for _, spec := range specs { ds, err := spec.Parse(context.Background()) if err != nil { return nil, err } - dsb := ra.handlerMap[ds.Handler].DatasetsForBackup(ds) - out = append(out, dsb...) + out = append(out, ds...) } return out, nil } -func checkTwoUserAccountsAtoms(ra *runtimeAssets, datasets []Dataset) error { +func checkTwoUserAccountsAtoms(ra *runtimeAssets, datasets []*Dataset) error { var numAtoms int for _, ds := range datasets { if ds.Name == "" { return errors.New("empty dataset name") } - if ds.Handler != "file" { - return fmt.Errorf("expected handler 'file', got '%s'", ds.Handler) - } for _, atom := range ds.Atoms { - if !strings.HasPrefix(atom.SourcePath, "/data/") { - return fmt.Errorf("bad atom source path: %s", atom.SourcePath) + if !strings.HasPrefix(atom.Name, "users/") { + return fmt.Errorf("bad atom name: %s", atom.Name) } numAtoms++ } diff --git a/handler_file.go b/handler_file.go index c82abb3..3fa6d54 100644 --- a/handler_file.go +++ b/handler_file.go @@ -10,48 +10,60 @@ import ( type fileHandler struct { path string - spec HandlerSpec } -func newFileHandler(spec HandlerSpec, _ *Shell) (Handler, error) { - path, ok := spec.Params["path"].(string) - if !ok || path == "" { - return nil, errors.New("missing path") +func newFileHandler(name string, params Params) (Handler, error) { + path := params.Get("path") + if path == "" { + return nil, errors.New("path not set") } - return &fileHandler{path: path, spec: spec}, nil + return &fileHandler{path: path}, nil } -func (h *fileHandler) Spec() HandlerSpec { - return h.spec -} - -func (h *fileHandler) DatasetsForBackup(ds Dataset) []Dataset { - // Set SourcePath on all atoms. - var atoms []Atom - for _, atom := range ds.Atoms { - relPath := atom.RelativePath - if relPath == "" { - relPath = atom.Name +// Convert the atom to a path. +func atomPath(a Atom, root string) string { + // If the atom has a path, use that. + if a.Path != "" { + // If it's an absolute path, just use it. + if a.Path[0] == '/' { + return a.Path } - atom.SourcePath = filepath.Join(h.path, relPath) - atoms = append(atoms, atom) + // Otherwise join it with the root path. + return filepath.Join(root, a.Path) } - ds.Atoms = atoms - return []Dataset{ds} + // Join the name with the root path by default. + return filepath.Join(root, a.Name) } -func (h *fileHandler) BackupJob(repo Repository, backup Backup, ds Dataset) jobs.Job { +func (h *fileHandler) BackupJob(rctx RuntimeContext, repo Repository, backup *Backup, ds *Dataset) jobs.Job { + // Build the list of filesystem paths to pass to the + // Repository.Backup method. + var paths []string + for _, a := range ds.Atoms { + paths = append(paths, atomPath(a, h.path)) + } + cmd := repo.BackupCmd(backup, ds, paths) + + // Now pass those paths to the Backup method. return jobs.JobFunc(func(ctx context.Context) error { - return repo.Backup(ctx, backup, ds, h.path /* UNUSED */) + return rctx.Shell().Run(ctx, cmd) }) } -func (h *fileHandler) DatasetsForRestore(ds Dataset) []Dataset { - return []Dataset{ds} -} +func (h *fileHandler) RestoreJob(rctx RuntimeContext, repo Repository, backup *Backup, ds *Dataset, target string) jobs.Job { + // Build the list of filesystem paths to pass to the + // Repository.Backup method. + var paths []string + for _, a := range ds.Atoms { + paths = append(paths, atomPath(a, h.path)) + } -func (h *fileHandler) RestoreJob(repo Repository, backup Backup, ds Dataset, target string) jobs.Job { + // Call the repo Restore method. return jobs.JobFunc(func(ctx context.Context) error { - return repo.Restore(ctx, backup, ds, target) + cmd, err := repo.RestoreCmd(ctx, rctx, backup, ds, paths, target) + if err != nil { + return err + } + return rctx.Shell().Run(ctx, cmd) }) } diff --git a/handler_pipe.go b/handler_pipe.go index cb40e52..e1e3fde 100644 --- a/handler_pipe.go +++ b/handler_pipe.go @@ -4,9 +4,8 @@ import ( "context" "errors" "fmt" - "io" "os" - "path/filepath" + "strings" "git.autistici.org/ai3/tools/tabacco/jobs" ) @@ -16,119 +15,74 @@ import ( // distinguish multiple atoms inside it. type pipeHandler struct { backupCmd, restoreCmd string - shell *Shell - spec HandlerSpec } -func newPipeHandler(spec HandlerSpec, shell *Shell) (Handler, error) { - backupCmd, ok := spec.Params["backup_command"].(string) - if !ok || backupCmd == "" { - return nil, errors.New("missing backup_command") +func newPipeHandler(name string, params Params) (Handler, error) { + backupCmd := params.Get("backup_command") + if backupCmd == "" { + return nil, errors.New("backup_command not set") } - restoreCmd, ok := spec.Params["restore_command"].(string) - if !ok || restoreCmd == "" { - return nil, errors.New("missing restore_command") + + restoreCmd := params.Get("restore_command") + if restoreCmd == "" { + return nil, errors.New("restore_command not set") } + return &pipeHandler{ - spec: spec, backupCmd: backupCmd, restoreCmd: restoreCmd, - shell: shell, }, nil } -func (h *pipeHandler) Spec() HandlerSpec { - return h.spec -} - -func (h *pipeHandler) DatasetsForBackup(ds Dataset) []Dataset { - var dsl []Dataset - for _, atom := range ds.Atoms { - atom.SourcePath = filepath.Join(ds.Name, atom.Name) - dsl = append(dsl, makeSingleAtomDataset(ds, atom)) - } - return dsl -} - -func (h *pipeHandler) BackupJob(repo Repository, backup Backup, ds Dataset) jobs.Job { - if len(ds.Atoms) > 1 { - panic("more than 1 atom in pipe source") - } - +func (h *pipeHandler) BackupJob(rctx RuntimeContext, repo Repository, backup *Backup, ds *Dataset) jobs.Job { + cmd := fmt.Sprintf( + "(%s) | %s", + expandVars(h.backupCmd, backup, ds), + repo.BackupStreamCmd(backup, ds), + ) return jobs.JobFunc(func(ctx context.Context) error { - return h.backupAtom(ctx, repo, backup, ds, ds.Atoms[0]) + return rctx.Shell().Run(ctx, cmd) }) } -func (h *pipeHandler) backupAtom(ctx context.Context, repo Repository, backup Backup, ds Dataset, atom Atom) error { - return h.shell.RunStdoutPipe( - ctx, - expandVars(h.backupCmd, backup, ds, atom), - func(stdout io.Reader) error { - return repo.BackupStream(ctx, backup, singleAtomDataset(ds, atom), stdout) - }, - ) -} - -func (h *pipeHandler) DatasetsForRestore(ds Dataset) []Dataset { - var dsl []Dataset - for _, atom := range ds.Atoms { - dsl = append(dsl, makeSingleAtomDataset(ds, atom)) - } - return dsl -} - -func (h *pipeHandler) RestoreJob(repo Repository, backup Backup, ds Dataset, target string) jobs.Job { - var restoreJobs []jobs.Job - for _, atom := range ds.Atoms { - func(atom Atom) { - restoreJobs = append(restoreJobs, jobs.JobFunc(func(ctx context.Context) error { - return h.restoreAtom(ctx, repo, backup, ds, atom, target) - })) - }(atom) - } - return jobs.AsyncGroup(restoreJobs) -} - -func (h *pipeHandler) restoreAtom(ctx context.Context, repo Repository, backup Backup, ds Dataset, atom Atom, target string) error { - return h.shell.RunStdinPipe( - ctx, - expandVars(h.restoreCmd, backup, ds, atom), - func(stdin io.Writer) error { - return repo.RestoreStream(ctx, backup, singleAtomDataset(ds, atom), target, stdin) - }, - ) -} - -func singleAtomDataset(ds Dataset, atom Atom) Dataset { - return Dataset{ - Name: ds.Name, - Handler: ds.Handler, - Atoms: []Atom{atom}, - } +func (h *pipeHandler) RestoreJob(rctx RuntimeContext, repo Repository, backup *Backup, ds *Dataset, target string) jobs.Job { + return jobs.JobFunc(func(ctx context.Context) error { + restoreCmd, err := repo.RestoreStreamCmd(ctx, rctx, backup, ds, getWorkDir(ctx)) + if err != nil { + return err + } + cmd := fmt.Sprintf( + "%s | (%s)", + restoreCmd, + expandVars(h.restoreCmd, backup, ds), + ) + return rctx.Shell().Run(ctx, cmd) + }) } -func expandVars(s string, backup Backup, ds Dataset, atom Atom) string { +func expandVars(s string, backup *Backup, ds *Dataset) string { return os.Expand(s, func(key string) string { switch key { + case "$": + return key case "backup.id": return backup.ID case "ds.name": return ds.Name - case "atom.name": - return atom.Name - case "atom.path": - return filepath.Join(ds.Name, atom.Name) + case "atom.names": + names := make([]string, 0, len(ds.Atoms)) + for _, a := range ds.Atoms { + names = append(names, a.Name) + } + return strings.Join(names, " ") + case "atom.paths": + paths := make([]string, 0, len(ds.Atoms)) + for _, a := range ds.Atoms { + paths = append(paths, a.Path) + } + return strings.Join(paths, " ") default: return os.Getenv(key) } }) } - -func makeSingleAtomDataset(ds Dataset, atom Atom) Dataset { - return Dataset{ - Name: fmt.Sprintf("/%s.%s", ds.Name, atom.Name), - Handler: ds.Handler, - Atoms: []Atom{atom}, - } -} diff --git a/handlers.go b/handlers.go index c0039c6..afc6958 100644 --- a/handlers.go +++ b/handlers.go @@ -13,25 +13,30 @@ type HandlerSpec struct { // Handler type, one of the known types. Type string `yaml:"type"` - Params map[string]interface{} `yaml:"params"` - - PreBackupCommand string `yaml:"pre_backup_command"` - PostBackupCommand string `yaml:"post_backup_command"` - PreRestoreCommand string `yaml:"pre_restore_command"` - PostRestoreCommand string `yaml:"post_restore_command"` + Params Params `yaml:"params"` } // Parse a HandlerSpec and return a Handler instance. -func (spec *HandlerSpec) Parse(shell *Shell) (Handler, error) { +func (spec *HandlerSpec) Parse(src *SourceSpec) (Handler, error) { if spec.Name == "" { - return nil, errors.New("name is empty") + return nil, errors.New("name is not set") + } + + // Merge parameters from the handler spec and the source, with + // preference to the latter. + params := make(map[string]interface{}) + for k, v := range spec.Params { + params[k] = v + } + for k, v := range src.Params { + params[k] = v } switch spec.Type { case "file": - return newFileHandler(*spec, shell) + return newFileHandler(spec.Name, params) case "pipe": - return newPipeHandler(*spec, shell) + return newPipeHandler(spec.Name, params) default: return nil, fmt.Errorf("%s: unknown handler type '%s'", spec.Name, spec.Type) } diff --git a/manager.go b/manager.go index 7f97410..651cc27 100644 --- a/manager.go +++ b/manager.go @@ -2,6 +2,7 @@ package tabacco import ( "context" + "errors" "fmt" "log" "os" @@ -46,14 +47,16 @@ func NewManager(ctx context.Context, configMgr *ConfigManager, ms MetadataStore) // Close the Manager and free all associated resources (those owned by // this object). func (m *tabaccoManager) Close() error { + m.workdirManager.Close() + return nil } type metadataJob struct { jobs.Job ms MetadataStore - backup Backup - ds Dataset + backup *Backup + ds *Dataset } func (j *metadataJob) RunContext(ctx context.Context) error { @@ -66,7 +69,7 @@ func (j *metadataJob) RunContext(ctx context.Context) error { return err } -func (m *tabaccoManager) withMetadata(j jobs.Job, backup Backup, ds Dataset) jobs.Job { +func (m *tabaccoManager) withMetadata(j jobs.Job, backup *Backup, ds *Dataset) jobs.Job { return &metadataJob{ Job: j, ms: m.ms, @@ -78,28 +81,47 @@ func (m *tabaccoManager) withMetadata(j jobs.Job, backup Backup, ds Dataset) job // Prepare the repository for a new backup. This is a synchronous // operation: we need to wait for it to complete to avoid running the // backup tasks too soon. -func (m *tabaccoManager) prepareBackupJob(backup Backup) jobs.Job { +func (m *tabaccoManager) prepareBackupJob(rctx RuntimeContext, backup *Backup) jobs.Job { + repo := m.configMgr.getRepository() return jobs.JobFunc(func(ctx context.Context) error { - repo := m.configMgr.getRepository() - if err := repo.Init(ctx); err != nil { - log.Printf("repository init failed: %v", err) - return err - } - log.Printf("preparing backup %s", backup.ID) - return repo.Prepare(ctx, backup) + return repo.Init(ctx, rctx) + //log.Printf("preparing backup %s", backup.ID) + //return repo.Prepare(ctx, backup) }) } -func (m *tabaccoManager) backupDatasetJob(h Handler, backup Backup, ds Dataset) jobs.Job { +func (m *tabaccoManager) wrapWithCommands(rctx RuntimeContext, initJob, backupJob jobs.Job, pre, post string) jobs.Job { var out []jobs.Job + if initJob != nil { + out = append(out, initJob) + } + if pre != "" { + out = append(out, m.commandJob(rctx, pre)) + } + out = append(out, backupJob) + if post != "" { + out = append(out, m.commandJob(rctx, post)) + } - // Let Handlers modify the Dataset if necessary, or generate - // more than one. - dsl := h.DatasetsForBackup(ds) + if len(out) == 1 { + return out[0] + } + return jobs.SyncGroup(out) +} - // Run pre_backup_command. - if cmd := h.Spec().PreBackupCommand; cmd != "" { - out = append(out, m.datasetCommandJob(cmd, backup, ds)) +func (m *tabaccoManager) makeBackupJob(ctx context.Context, rctx RuntimeContext, backup *Backup, src *SourceSpec) (jobs.Job, error) { + // Compile the source and the associated Handler. + dsl, err := src.Parse(ctx) + if err != nil { + return nil, err + } + hspec, ok := m.configMgr.getHandlerSpec(src.Handler) + if !ok { + return nil, fmt.Errorf("unknown handler '%s'", src.Handler) + } + h, err := hspec.Parse(src) + if err != nil { + return nil, err } // The actual backup operation. Assemble all the backup jobs @@ -107,22 +129,22 @@ func (m *tabaccoManager) backupDatasetJob(h Handler, backup Backup, ds Dataset) // // TODO: get the timeout from the SourceSpec. var backupJobs []jobs.Job - for _, realDS := range dsl { + repo := m.configMgr.getRepository() + for _, ds := range dsl { backupJobs = append(backupJobs, m.withMetadata( - h.BackupJob(m.configMgr.getRepository(), backup, realDS), + h.BackupJob(rctx, repo, backup, ds), backup, - realDS, + ds, )) } - out = append(out, jobs.WithTimeout( - jobs.AsyncGroup(backupJobs), - 24*time.Hour, - )) - - // Run post_backup_command. - if cmd := h.Spec().PostBackupCommand; cmd != "" { - out = append(out, m.datasetCommandJob(cmd, backup, ds)) - } + + backupJob := m.wrapWithCommands( + rctx, + m.prepareBackupJob(rctx, backup), + jobs.WithTimeout(jobs.AsyncGroup(backupJobs), src.Timeout), + src.PreBackupCommand, + src.PostBackupCommand, + ) // Group the jobs (sequentially) if there's more than one of // them. Give the final job a status and a user-visible name, @@ -132,14 +154,14 @@ func (m *tabaccoManager) backupDatasetJob(h Handler, backup Backup, ds Dataset) // in the 'backup' queue for concurrency limiting. // // Oh, and here is where we add per-dataset instrumentation. - id := fmt.Sprintf("backup-dataset-%s", ds.Name) + id := fmt.Sprintf("backup-source-%s", src.Name) return m.WithQueue( m.WithStatus( m.WithExclusiveLock( m.withWorkDir( withInstrumentation( - jobs.SyncGroup(out), - ds.Name, + backupJob, + src.Name, ), ), id, @@ -147,51 +169,23 @@ func (m *tabaccoManager) backupDatasetJob(h Handler, backup Backup, ds Dataset) id, ), "backup", - ) + ), nil } -// BackupJob returns a single Job that backs up one or more sources to -// the configured destination repository. -func (m *tabaccoManager) BackupJob(ctx context.Context, sourceSpecs []SourceSpec) (Backup, jobs.Job, error) { - // Parse the source specs and obtain Datasets. Errors here are - // logged but *not* fatal, unless there are errors and the - // list of non-erroring sources is nil. - - backup := newBackup("") - prepJob := m.prepareBackupJob(backup) - var backupJobs []jobs.Job - - merr := new(util.MultiError) - for _, spec := range sourceSpecs { - h, ok := m.configMgr.getHandler(spec.Handler) - if !ok { - return Backup{}, nil, fmt.Errorf("inconsistency: no '%s' handler", spec.Handler) - } - - ds, err := spec.Parse(ctx) - if err != nil { - merr.Add(err) - continue - } +func (m *tabaccoManager) BackupJob(ctx context.Context, src *SourceSpec) (*Backup, jobs.Job, error) { + // Create a new Backup. + b := newBackup("") - // Create the backup job and add it to our list. - backupJobs = append(backupJobs, m.backupDatasetJob(h, backup, ds)) - } + // Create a RuntimeContext. + rctx := m.configMgr.newRuntimeContext() - // Run the job to initialize the repository before anything else. - j := m.WithStatus( - jobs.SyncGroup([]jobs.Job{ - prepJob, - jobs.AsyncGroup(backupJobs), - }), - fmt.Sprintf("backup-%s", backup.ID), - ) - return backup, j, nil + j, err := m.makeBackupJob(ctx, rctx, b, src) + return b, j, err } // Backup just runs the BackupJob synchronously. -func (m *tabaccoManager) Backup(ctx context.Context, sourceSpecs []SourceSpec) (Backup, error) { - backup, job, err := m.BackupJob(ctx, sourceSpecs) +func (m *tabaccoManager) Backup(ctx context.Context, src *SourceSpec) (*Backup, error) { + backup, job, err := m.BackupJob(ctx, src) if err != nil { return backup, err } @@ -199,29 +193,36 @@ func (m *tabaccoManager) Backup(ctx context.Context, sourceSpecs []SourceSpec) ( return backup, err } -func (m *tabaccoManager) restoreDatasetJob(h Handler, backup Backup, ds Dataset, target string) jobs.Job { - var out []jobs.Job - - dsl := h.DatasetsForRestore(ds) - - // Run pre_restore_command. - if cmd := h.Spec().PreRestoreCommand; cmd != "" { - out = append(out, m.datasetCommandJob(cmd, backup, ds)) +func (m *tabaccoManager) makeRestoreJob(rctx RuntimeContext, backup *Backup, src *SourceSpec, dsl []*Dataset, target string) (jobs.Job, error) { + // Just need the Handler. + hspec, ok := m.configMgr.getHandlerSpec(src.Handler) + if !ok { + return nil, fmt.Errorf("unknown handler '%s'", src.Handler) + } + h, err := hspec.Parse(src) + if err != nil { + return nil, err } // The actual backup operation. Just a thin wrapper around // doBackupDataset() that binds together the context, backup, // ds and target via the closure. var restoreJobs []jobs.Job - for _, realDS := range dsl { - restoreJobs = append(restoreJobs, h.RestoreJob(m.configMgr.getRepository(), backup, realDS, target)) + repo := m.configMgr.getRepository() + for _, ds := range dsl { + restoreJobs = append( + restoreJobs, + h.RestoreJob(rctx, repo, backup, ds, target), + ) } - out = append(out, jobs.AsyncGroup(restoreJobs)) - // Run post_restore_command. - if cmd := h.Spec().PostRestoreCommand; cmd != "" { - out = append(out, m.datasetCommandJob(cmd, backup, ds)) - } + restoreJob := m.wrapWithCommands( + rctx, + nil, + jobs.AsyncGroup(restoreJobs), + src.PreRestoreCommand, + src.PostRestoreCommand, + ) // Group the jobs (sequentially) if there's more than one of // them. Give the final job a status and a user-visible name, @@ -229,45 +230,71 @@ func (m *tabaccoManager) restoreDatasetJob(h Handler, backup Backup, ds Dataset, // leave-running policy, so no more than one restore per // datasource can run at any given time. Finally, the job runs // in the 'restore' queue for concurrency limiting. - id := fmt.Sprintf("restore_%s", ds.Name) + id := fmt.Sprintf("restore-source-%s", src.Name) return m.WithQueue( m.WithStatus( - m.WithExclusiveLock(jobs.SyncGroup(out), id, false), + m.WithExclusiveLock( + restoreJob, + id, + false), id, ), "restore", - ) + ), nil +} + +func groupDatasetsBySource(dsl []*Dataset) map[string][]*Dataset { + m := make(map[string][]*Dataset) + for _, ds := range dsl { + m[ds.Source] = append(m[ds.Source], ds) + } + return m } // RestoreJob creates a job that restores the results of the // FindRequest (with NumVersions=1) onto the given target directory. -func (m *tabaccoManager) RestoreJob(ctx context.Context, req FindRequest, target string) (jobs.Job, error) { - // Find the atoms relevant to this restore. +func (m *tabaccoManager) RestoreJob(ctx context.Context, req *FindRequest, target string) (jobs.Job, error) { + // Find the atoms relevant to this restore. The results will + // be grouped in Backups and Datasets that only include the + // relevant Atoms. req.NumVersions = 1 - versions, err := m.ms.FindAtoms(ctx, req) + backups, err := m.ms.FindAtoms(ctx, req) if err != nil { return nil, err } + if len(backups) == 0 { + return nil, errors.New("no results found for query") + } + + // Create a RuntimeContext. + rctx := m.configMgr.newRuntimeContext() var restoreJobs []jobs.Job - for _, vv := range versions { - ds := vv[0].Dataset - backup := vv[0].Backup - - h, ok := m.configMgr.getHandler(ds.Handler) - if !ok { - log.Printf("%s: unknown handler '%s'", ds.Name, ds.Handler) - continue + merr := new(util.MultiError) + for _, b := range backups { + // Group the datasets by source, find the source and create the restore jobs. + for srcName, dsl := range groupDatasetsBySource(b.Datasets) { + + src := m.configMgr.findSource(srcName) + if src == nil { + merr.Add(fmt.Errorf("unknown source '%s'", srcName)) + continue + } + + j, err := m.makeRestoreJob(rctx, b, src, dsl, target) + if err != nil { + merr.Add(fmt.Errorf("source %s: %v", srcName, err)) + continue + } + restoreJobs = append(restoreJobs, j) } - - restoreJobs = append(restoreJobs, m.restoreDatasetJob(h, backup, ds, target)) } - return m.WithStatus(jobs.AsyncGroup(restoreJobs), fmt.Sprintf("restore_%s", util.RandomID())), nil + return m.WithStatus(jobs.AsyncGroup(restoreJobs), fmt.Sprintf("restore_%s", util.RandomID())), merr.OrNil() } // Restore just runs the RestoreJob synchronously. -func (m *tabaccoManager) Restore(ctx context.Context, req FindRequest, target string) error { +func (m *tabaccoManager) Restore(ctx context.Context, req *FindRequest, target string) error { job, err := m.RestoreJob(ctx, req, target) if err != nil { return err @@ -277,23 +304,19 @@ func (m *tabaccoManager) Restore(ctx context.Context, req FindRequest, target st // Create a new Backup object with its own unique ID (which actually // consists of 16 random bytes, hex-encoded). -func newBackup(host string) Backup { +func newBackup(host string) *Backup { if host == "" { host, _ = os.Hostname() // nolint } - return Backup{ + return &Backup{ ID: util.RandomID(), Host: host, Timestamp: time.Now(), } } -func (m *tabaccoManager) datasetCommandJob(cmd string, backup Backup, ds Dataset) jobs.Job { - env := map[string]string{ - "BACKUP_ID": backup.ID, - "DATASET_NAME": ds.Name, - } +func (m *tabaccoManager) commandJob(rctx RuntimeContext, cmd string) jobs.Job { return jobs.JobFunc(func(ctx context.Context) error { - return m.configMgr.getShell().RunWithEnv(ctx, cmd, env) + return rctx.Shell().Run(ctx, cmd) }) } diff --git a/manager_test.go b/manager_test.go index 7593fb1..6f68b57 100644 --- a/manager_test.go +++ b/manager_test.go @@ -2,7 +2,7 @@ package tabacco import ( "context" - "fmt" + "log" "testing" "time" @@ -15,11 +15,11 @@ type dummyMetadataEntry struct { name string dsName string host string - handler string + source string atom Atom } -func (e dummyMetadataEntry) match(req FindRequest) bool { +func (e dummyMetadataEntry) match(req *FindRequest) bool { if req.Pattern != "" && !req.matchPattern(e.name) { return false } @@ -29,15 +29,15 @@ func (e dummyMetadataEntry) match(req FindRequest) bool { return true } -func (e dummyMetadataEntry) toDataset() Dataset { - return Dataset{ - Name: e.dsName, - Handler: e.handler, +func (e dummyMetadataEntry) toDataset() *Dataset { + return &Dataset{ + Name: e.dsName, + Source: e.source, } } -func (e dummyMetadataEntry) toBackup() Backup { - return Backup{ +func (e dummyMetadataEntry) toBackup() *Backup { + return &Backup{ ID: e.backupID, Timestamp: e.backupTS, Host: e.host, @@ -48,7 +48,7 @@ type dummyMetadataStore struct { log []dummyMetadataEntry } -func (d *dummyMetadataStore) FindAtoms(_ context.Context, req FindRequest) ([][]Version, error) { +func (d *dummyMetadataStore) FindAtoms(_ context.Context, req *FindRequest) ([]*Backup, error) { tmp := make(map[string]map[string][]dummyMetadataEntry) for _, l := range d.log { if !l.match(req) { @@ -68,37 +68,47 @@ func (d *dummyMetadataStore) FindAtoms(_ context.Context, req FindRequest) ([][] count = 1 } - var out [][]Version + // Accumulate output into Backups + btmp := make(map[string]*Backup) for _, dsmap := range tmp { - var dsVersions []Version for _, dslog := range dsmap { - v := Version{ - Dataset: dslog[0].toDataset(), - Backup: dslog[0].toBackup(), + ds := dslog[0].toDataset() + b := dslog[0].toBackup() + bb, ok := btmp[b.ID] + if !ok { + btmp[b.ID] = b + bb = b } + bb.Datasets = append(bb.Datasets, ds) + + ds.Atoms = nil if len(dslog) > count { dslog = dslog[len(dslog)-count:] } for _, l := range dslog { - v.Dataset.Atoms = append(v.Dataset.Atoms, l.atom) + ds.Atoms = append(ds.Atoms, l.atom) } - dsVersions = append(dsVersions, v) } - out = append(out, dsVersions) + } + var out []*Backup + for _, b := range btmp { + out = append(out, b) } return out, nil } -func (d *dummyMetadataStore) AddDataset(_ context.Context, backup Backup, ds Dataset) error { +func (d *dummyMetadataStore) AddDataset(_ context.Context, backup *Backup, ds *Dataset) error { + log.Printf("AddDataset: %+v", *ds) for _, atom := range ds.Atoms { - name := fmt.Sprintf("%s/%s", ds.Name, atom.Name) + //name := fmt.Sprintf("%s/%s", ds.Name, atom.Name) + name := atom.Name d.log = append(d.log, dummyMetadataEntry{ backupID: backup.ID, backupTS: backup.Timestamp, host: backup.Host, name: name, dsName: ds.Name, - handler: ds.Handler, + source: ds.Source, atom: atom, }) } @@ -115,43 +125,45 @@ func TestManager_Backup(t *testing.T) { "password": "testpass", }, } - handlerSpecs := []HandlerSpec{ - { + handlerSpecs := []*HandlerSpec{ + &HandlerSpec{ Name: "file1", Type: "file", Params: map[string]interface{}{ "path": "/source/of/file1", }, - PreBackupCommand: "echo hello", + //PreBackupCommand: "echo hello", }, - { + &HandlerSpec{ Name: "dbpipe", Type: "pipe", Params: map[string]interface{}{ - "backup_command": "echo ${backup.id} ${ds.name} ${atom.name}", + "backup_command": "echo ${backup.id} ${ds.name} ${atom.names}", "restore_command": "cat", }, }, } - sourceSpecs := []SourceSpec{ - { - Name: "source1", - Handler: "file1", - Atoms: []Atom{ - { - Name: "user1", - RelativePath: "user1", + sourceSpecs := []*SourceSpec{ + &SourceSpec{ + Name: "source1", + Handler: "file1", + Schedule: "@random_every 1h", + Datasets: []*DatasetSpec{ + &DatasetSpec{ + Name: "user1", + Atoms: []Atom{{Name: "user1"}}, }, - { - Name: "user2", - RelativePath: "user2", + &DatasetSpec{ + Name: "user2", + Atoms: []Atom{{Name: "user2"}}, }, }, }, - { - Name: "source2", - Handler: "dbpipe", - AtomsCommand: "echo user1 user1 ; echo user2 user2", + &SourceSpec{ + Name: "source2", + Handler: "dbpipe", + Schedule: "@random_every 1h", + DatasetsCommand: "echo '[{name: users, atoms: [{name: user1}, {name: user2}]}]'", }, } queueSpec := jobs.QueueSpec{ @@ -177,30 +189,35 @@ func TestManager_Backup(t *testing.T) { } defer m.Close() - backup, err := m.Backup(context.TODO(), configMgr.getSourceSpecs()) - if err != nil { - t.Fatal(err) - } - if backup.ID == "" || backup.Host == "" { - t.Fatalf("empty fields in backup: %+v", backup) + for _, src := range configMgr.getSourceSpecs() { + backup, err := m.Backup(context.TODO(), src) + if err != nil { + t.Fatal(err) + } + if backup.ID == "" || backup.Host == "" { + t.Fatalf("empty fields in backup: %+v", backup) + } } // Try to find atoms in the metadata store. // Let's try with a pattern first. - resp, err := store.FindAtoms(context.TODO(), FindRequest{Pattern: "source1/*", NumVersions: 1}) + resp, err := store.FindAtoms(context.TODO(), &FindRequest{Pattern: "source1/*", NumVersions: 1}) if err != nil { t.Fatal("FindAtoms", err) } - if len(resp) != 2 { - t.Fatalf("bad response: %+v", resp) + if len(resp) != 1 { + t.Fatalf("bad FindAtoms(source1/*) response: %+v", resp) + } + if l := len(resp[0].Datasets); l != 2 { + t.Fatalf("bad number of datasets returned by FindAtoms(source1/*): got %d, expected 2", l) } // A pattern matching a single atom. - resp, err = store.FindAtoms(context.TODO(), FindRequest{Pattern: "source1/user2"}) + resp, err = store.FindAtoms(context.TODO(), &FindRequest{Pattern: "source1/user2/user2"}) if err != nil { t.Fatal("FindAtoms", err) } if len(resp) != 1 { - t.Fatalf("bad response: %+v", resp) + t.Fatalf("bad FindAtoms(source1/user2/user2) response: %+v", resp) } } diff --git a/metadb/client/client.go b/metadb/client/client.go index ace6903..2fd61d5 100644 --- a/metadb/client/client.go +++ b/metadb/client/client.go @@ -11,6 +11,7 @@ type metadbClient struct { backend clientutil.Backend } +// New creates a new client for a remote MetadataStore. func New(config *clientutil.BackendConfig) (tabacco.MetadataStore, error) { be, err := clientutil.NewBackend(config) if err != nil { @@ -20,11 +21,16 @@ func New(config *clientutil.BackendConfig) (tabacco.MetadataStore, error) { } type addDatasetRequest struct { - Backup tabacco.Backup `json:"backup"` - Dataset tabacco.Dataset `json:"dataset"` + Backup *tabacco.Backup `json:"backup"` + Dataset *tabacco.Dataset `json:"dataset"` } -func (c *metadbClient) AddDataset(ctx context.Context, backup tabacco.Backup, ds tabacco.Dataset) error { +func (c *metadbClient) AddDataset(ctx context.Context, backup *tabacco.Backup, ds *tabacco.Dataset) error { + // Ensure that the backup has no Datasets + if len(backup.Datasets) > 0 { + panic("AddDataset client called with non-empty backup.Datasets") + } + req := addDatasetRequest{ Backup: backup, Dataset: ds, @@ -32,8 +38,8 @@ func (c *metadbClient) AddDataset(ctx context.Context, backup tabacco.Backup, ds return c.backend.Call(ctx, "", "/api/add_dataset", &req, nil) } -func (c *metadbClient) FindAtoms(ctx context.Context, req tabacco.FindRequest) ([][]tabacco.Version, error) { - var resp [][]tabacco.Version - err := c.backend.Call(ctx, "", "/api/find_atoms", &req, &resp) +func (c *metadbClient) FindAtoms(ctx context.Context, req *tabacco.FindRequest) ([]*tabacco.Backup, error) { + var resp []*tabacco.Backup + err := c.backend.Call(ctx, "", "/api/find_atoms", req, &resp) return resp, err } diff --git a/metadb/migrations/1_initialize_schema.up.sql b/metadb/migrations/1_initialize_schema.up.sql index fbf4d6e..97df028 100644 --- a/metadb/migrations/1_initialize_schema.up.sql +++ b/metadb/migrations/1_initialize_schema.up.sql @@ -7,13 +7,11 @@ CREATE TABLE log ( backup_timestamp DATETIME, backup_host VARCHAR(128), dataset_name VARCHAR(128), - dataset_handler VARCHAR(128), + dataset_source VARCHAR(128), atom_name VARCHAR(255), - atom_path VARCHAR(255), - atom_source_path TEXT, - atom_relative_path TEXT + atom_path VARCHAR(255) ); -CREATE UNIQUE INDEX idx_log_primary ON log (backup_id, dataset_name, atom_path); +CREATE UNIQUE INDEX idx_log_primary ON log (backup_id, dataset_name, atom_name); CREATE INDEX idx_log_backup_id ON log (backup_id); CREATE INDEX idx_log_backup_id_and_dataset_name ON log (backup_id, dataset_name); diff --git a/metadb/migrations/bindata.go b/metadb/migrations/bindata.go index dde27ea..320234c 100644 --- a/metadb/migrations/bindata.go +++ b/metadb/migrations/bindata.go @@ -63,7 +63,7 @@ func _1_initialize_schemaDownSql() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "1_initialize_schema.down.sql", size: 123, mode: os.FileMode(436), modTime: time.Unix(1532974389, 0)} + info := bindataFileInfo{name: "1_initialize_schema.down.sql", size: 123, mode: os.FileMode(420), modTime: time.Unix(1535012987, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -77,14 +77,12 @@ CREATE TABLE log ( backup_timestamp DATETIME, backup_host VARCHAR(128), dataset_name VARCHAR(128), - dataset_handler VARCHAR(128), + dataset_source VARCHAR(128), atom_name VARCHAR(255), - atom_path VARCHAR(255), - atom_source_path TEXT, - atom_relative_path TEXT + atom_path VARCHAR(255) ); -CREATE UNIQUE INDEX idx_log_primary ON log (backup_id, dataset_name, atom_path); +CREATE UNIQUE INDEX idx_log_primary ON log (backup_id, dataset_name, atom_name); CREATE INDEX idx_log_backup_id ON log (backup_id); CREATE INDEX idx_log_backup_id_and_dataset_name ON log (backup_id, dataset_name); `) @@ -99,7 +97,7 @@ func _1_initialize_schemaUpSql() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "1_initialize_schema.up.sql", size: 604, mode: os.FileMode(436), modTime: time.Unix(1532985344, 0)} + info := bindataFileInfo{name: "1_initialize_schema.up.sql", size: 539, mode: os.FileMode(420), modTime: time.Unix(1560765826, 0)} a := &asset{bytes: bytes, info: info} return a, nil } diff --git a/metadb/server/http.go b/metadb/server/http.go index 01e2c7c..beae5e9 100644 --- a/metadb/server/http.go +++ b/metadb/server/http.go @@ -38,7 +38,7 @@ func (s *httpServer) handleFindAtoms(w http.ResponseWriter, r *http.Request) { return } - resp, err := s.FindAtoms(r.Context(), req) + resp, err := s.FindAtoms(r.Context(), &req) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) log.Printf("FindAtoms(%+v) error: %v", req, err) diff --git a/metadb/server/service.go b/metadb/server/service.go index 11a6fad..acbd0ab 100644 --- a/metadb/server/service.go +++ b/metadb/server/service.go @@ -5,7 +5,6 @@ import ( "database/sql" "fmt" "log" - "path/filepath" "strings" "time" @@ -14,15 +13,13 @@ import ( // An atom, as represented in the database, denormalized. type dbAtom struct { - BackupID string - BackupTimestamp time.Time - BackupHost string - DatasetName string - DatasetHandler string - AtomName string - AtomPath string - AtomSourcePath string - AtomRelativePath string + BackupID string + BackupTimestamp time.Time + BackupHost string + DatasetName string + DatasetSource string + AtomName string + AtomPath string } func makeAtoms(backup tabacco.Backup, ds tabacco.Dataset) []dbAtom { @@ -33,9 +30,9 @@ func makeAtoms(backup tabacco.Backup, ds tabacco.Dataset) []dbAtom { BackupTimestamp: backup.Timestamp, BackupHost: backup.Host, DatasetName: ds.Name, - DatasetHandler: ds.Handler, + DatasetSource: ds.Source, AtomName: atom.Name, - AtomPath: filepath.Join(ds.Name, atom.Name), + AtomPath: atom.Path, }) } return out @@ -51,73 +48,76 @@ func (a *dbAtom) getBackup() *tabacco.Backup { func (a *dbAtom) getDataset() *tabacco.Dataset { return &tabacco.Dataset{ - Name: a.DatasetName, - Handler: a.DatasetHandler, + Name: a.DatasetName, + Source: a.DatasetSource, } } func (a *dbAtom) getAtom() tabacco.Atom { return tabacco.Atom{ - Name: a.AtomName, - RelativePath: a.AtomRelativePath, - SourcePath: a.AtomSourcePath, + Name: a.AtomName, + Path: a.AtomPath, } } -func normalizeAtoms(dbAtoms []dbAtom, numVersions int) [][]tabacco.Version { +func keepNumVersions(dbAtoms []*dbAtom, numVersions int) []*dbAtom { // numVersions == 0 is remapped to 1. if numVersions < 1 { numVersions = 1 } - // Accumulate versions keyed by backup ID first, dataset name - // next. Preserve the ordering of backups in dbAtoms, which we - // are going to use later to apply a per-dataset limit. - backupMap := make(map[string]*tabacco.Backup) - dsMap := make(map[string]map[string]*tabacco.Dataset) - var backupsInOrder []string + count := 0 + tmp := make(map[string][]*dbAtom) + for _, a := range dbAtoms { + l := tmp[a.AtomName] + if len(l) < numVersions { + l = append(l, a) + count++ + } + tmp[a.AtomName] = l + } + out := make([]*dbAtom, 0, count) + for _, l := range tmp { + out = append(out, l...) + } + return out +} + +func groupByBackup(dbAtoms []*dbAtom) []*tabacco.Backup { + // As we scan through dbAtoms, aggregate into Backups and Datasets. + backups := make(map[string]*tabacco.Backup) + dsm := make(map[string]map[string]*tabacco.Dataset) for _, atom := range dbAtoms { // Create the Backup object if it does not exist. - if _, ok := backupMap[atom.BackupID]; !ok { - backupMap[atom.BackupID] = atom.getBackup() - backupsInOrder = append(backupsInOrder, atom.BackupID) + b, ok := backups[atom.BackupID] + if !ok { + b = atom.getBackup() + backups[atom.BackupID] = b } // Create the Dataset object for this Backup in the // two-level map (creating the intermediate map if // necessary). - tmp, ok := dsMap[atom.BackupID] + tmp, ok := dsm[atom.BackupID] if !ok { tmp = make(map[string]*tabacco.Dataset) - dsMap[atom.BackupID] = tmp + dsm[atom.BackupID] = tmp } ds, ok := tmp[atom.DatasetName] if !ok { ds = atom.getDataset() tmp[atom.DatasetName] = ds + b.Datasets = append(b.Datasets, ds) } // Finally, add the atom to the dataset. ds.Atoms = append(ds.Atoms, atom.getAtom()) } - // Now dump the maps to a Version array. - var out [][]tabacco.Version - dsCount := make(map[string]int) - for _, backupID := range backupsInOrder { - tmp := dsMap[backupID] - backup := backupMap[backupID] - var tmpv []tabacco.Version - for _, ds := range tmp { - if dsCount[ds.Name] < numVersions { - tmpv = append(tmpv, tabacco.Version{Backup: *backup, Dataset: *ds}) - dsCount[ds.Name]++ - } - } - if len(tmpv) > 0 { - out = append(out, tmpv) - } + out := make([]*tabacco.Backup, 0, len(backups)) + for _, b := range backups { + out = append(out, b) } return out } @@ -152,10 +152,10 @@ var statements = map[string]string{ "insert_atom": ` INSERT INTO log ( backup_id, backup_timestamp, backup_host, - dataset_name, dataset_handler, - atom_name, atom_path, atom_source_path, atom_relative_path + dataset_name, dataset_source, + atom_name, atom_path ) VALUES ( - ?, ?, ?, ?, ?, ?, ?, ?, ? + ?, ?, ?, ?, ?, ?, ? ) `, } @@ -175,11 +175,9 @@ func (s *Service) AddDataset(ctx context.Context, backup tabacco.Backup, ds taba dbAtom.BackupTimestamp, dbAtom.BackupHost, dbAtom.DatasetName, - dbAtom.DatasetHandler, + dbAtom.DatasetSource, dbAtom.AtomName, dbAtom.AtomPath, - dbAtom.AtomSourcePath, - dbAtom.AtomRelativePath, ); err != nil { return err } @@ -191,7 +189,7 @@ func (s *Service) AddDataset(ctx context.Context, backup tabacco.Backup, ds taba // FindAtoms searches for atoms meeting a particular criteria and // returns them grouped by backup and dataset (the atoms will be // contained within the dataset). -func (s *Service) FindAtoms(ctx context.Context, req tabacco.FindRequest) ([][]tabacco.Version, error) { +func (s *Service) FindAtoms(ctx context.Context, req *tabacco.FindRequest) ([]*tabacco.Backup, error) { tx, err := s.db.BeginTx(ctx, nil) if err != nil { return nil, err @@ -207,16 +205,16 @@ func (s *Service) FindAtoms(ctx context.Context, req tabacco.FindRequest) ([][]t args = append(args, req.Host) } if req.Pattern != "" { - where = append(where, "atom_path LIKE ?") + where = append(where, "atom_name LIKE ?") args = append(args, strings.Replace(req.Pattern, "*", "%", -1)) } // Build the final query and execute it. q := fmt.Sprintf( - `SELECT + `SELECT backup_id, backup_timestamp, backup_host, - dataset_name, dataset_handler, - atom_name, atom_path, atom_source_path, atom_relative_path + dataset_name, dataset_source, + atom_name, atom_path FROM log WHERE %s ORDER BY backup_timestamp DESC`, strings.Join(where, " AND "), @@ -227,22 +225,22 @@ func (s *Service) FindAtoms(ctx context.Context, req tabacco.FindRequest) ([][]t } defer rows.Close() // nolint - var atoms []dbAtom + var atoms []*dbAtom for rows.Next() { var a dbAtom if err := rows.Scan( &a.BackupID, &a.BackupTimestamp, &a.BackupHost, - &a.DatasetName, &a.DatasetHandler, - &a.AtomName, &a.AtomPath, &a.AtomSourcePath, &a.AtomRelativePath, + &a.DatasetName, &a.DatasetSource, + &a.AtomName, &a.AtomPath, ); err != nil { log.Printf("bad row: %v", err) continue } - atoms = append(atoms, a) + atoms = append(atoms, &a) } if err := rows.Err(); err != nil { return nil, err } - return normalizeAtoms(atoms, req.NumVersions), nil + return groupByBackup(keepNumVersions(atoms, req.NumVersions)), nil } diff --git a/metadb/server/service_test.go b/metadb/server/service_test.go index 2d3ddbd..69fb972 100644 --- a/metadb/server/service_test.go +++ b/metadb/server/service_test.go @@ -21,16 +21,16 @@ func addTestEntry(t *testing.T, svc *Service, backupID, host, dsName string) { Timestamp: time.Now(), }, tabacco.Dataset{ - Name: dsName, - Handler: "file", + Name: dsName, + Source: "file", Atoms: []tabacco.Atom{ { - Name: "sub1", - SourcePath: "/path/dataset1/sub1", + Name: dsName + "/sub1", + Path: "/path/dataset1/sub1", }, { - Name: "sub2", - SourcePath: "/path/dataset1/sub2", + Name: dsName + "/sub2", + Path: "/path/dataset1/sub2", }, }, }, @@ -48,7 +48,7 @@ func TestService_AddDataset(t *testing.T) { } defer svc.Close() - addTestEntry(t, svc, "1234", "host1", "dataset1") + addTestEntry(t, svc, "1234", "host1", "file/dataset1") } func TestService_FindAtoms(t *testing.T) { @@ -61,14 +61,14 @@ func TestService_FindAtoms(t *testing.T) { // Create 10 fake backups, which differ only in host. for i := 0; i < 10; i++ { - addTestEntry(t, svc, fmt.Sprintf("backup%06d", i), fmt.Sprintf("host%d", i), "dataset1") + addTestEntry(t, svc, fmt.Sprintf("backup%06d", i), fmt.Sprintf("host%d", i), "file/dataset1") } // Searching for a specific atom (common to all backups) // should return exactly 10 results. vv, err := svc.FindAtoms( context.Background(), - tabacco.FindRequest{ + &tabacco.FindRequest{ Pattern: "*/sub1", NumVersions: 10, }, @@ -84,7 +84,7 @@ func TestService_FindAtoms(t *testing.T) { // return a single result. vv, err = svc.FindAtoms( context.Background(), - tabacco.FindRequest{ + &tabacco.FindRequest{ Pattern: "*/sub1", Host: "host7", }, diff --git a/repository.go b/repository.go index cbf97c1..f88366f 100644 --- a/repository.go +++ b/repository.go @@ -13,14 +13,14 @@ type RepositorySpec struct { } // Parse a RepositorySpec and return a Repository instance. -func (spec *RepositorySpec) Parse(shell *Shell) (Repository, error) { +func (spec *RepositorySpec) Parse() (Repository, error) { if spec.Name == "" { return nil, errors.New("name is empty") } switch spec.Type { case "restic": - return newResticRepository(spec.Params, shell) + return newResticRepository(spec.Params) default: return nil, fmt.Errorf("unknown repository type '%s'", spec.Type) diff --git a/repository_restic.go b/repository_restic.go index 2570f5b..8f9e8e0 100644 --- a/repository_restic.go +++ b/repository_restic.go @@ -21,10 +21,11 @@ type resticRepository struct { bin string uri string passwordFile string - shell *Shell excludes []string excludeFiles []string autoPrune bool + + initialized bool } func (r *resticRepository) resticCmd() string { @@ -69,7 +70,7 @@ func checkResticVersion(bin string) error { } // newResticRepository returns a restic repository. -func newResticRepository(params map[string]interface{}, shell *Shell) (Repository, error) { +func newResticRepository(params map[string]interface{}) (Repository, error) { uri, ok := params["uri"].(string) if !ok || uri == "" { return nil, errors.New("missing uri") @@ -112,7 +113,6 @@ func newResticRepository(params map[string]interface{}, shell *Shell) (Repositor passwordFile: tmpf.Name(), excludes: ex, excludeFiles: exf, - shell: shell, autoPrune: autoPrune, }, nil } @@ -121,44 +121,46 @@ func (r *resticRepository) Close() error { return os.Remove(r.passwordFile) } -func (r *resticRepository) Init(ctx context.Context) error { +func (r *resticRepository) Init(ctx context.Context, rctx RuntimeContext) error { + if r.initialized { + return nil + } + // Restic init will fail the second time we run it, ignore // errors. - return r.shell.Run(ctx, fmt.Sprintf( + err := rctx.Shell().Run(ctx, fmt.Sprintf( "%s init --quiet || true", r.resticCmd(), )) + if err == nil { + r.initialized = true + } + return err } -func (r *resticRepository) Prepare(ctx context.Context, backup Backup) error { +func (r *resticRepository) Prepare(ctx context.Context, rctx RuntimeContext, backup *Backup) error { if !r.autoPrune { return nil } - return r.shell.Run(ctx, fmt.Sprintf( + return rctx.Shell().Run(ctx, fmt.Sprintf( "%s forget --host %s --keep-last 10 --prune", r.resticCmd(), backup.Host, )) } -func (r *resticRepository) Backup(ctx context.Context, backup Backup, ds Dataset, sourcePath string) error { - cmd := fmt.Sprintf( - "%s backup --cleanup-cache --exclude-caches --one-file-system --tag %s --tag backup_id=%s", +func (r *resticRepository) BackupCmd(backup *Backup, ds *Dataset, sourcePaths []string) string { + return fmt.Sprintf( + "%s backup --cleanup-cache --exclude-caches --one-file-system --tag %s --tag backup_id=%s %s", r.resticCmd(), ds.Name, backup.ID, + strings.Join(sourcePaths, " "), ) - for _, atom := range ds.Atoms { - if atom.SourcePath == "" { - return errors.New("atom without source path") - } - cmd += fmt.Sprintf(" %s", atom.SourcePath) - } - return r.shell.Run(ctx, cmd) } -func (r *resticRepository) getSnapshotID(ctx context.Context, backup Backup, ds Dataset) (string, error) { - data, err := r.shell.Output(ctx, fmt.Sprintf( +func (r *resticRepository) getSnapshotID(ctx context.Context, rctx RuntimeContext, backup *Backup, ds *Dataset) (string, error) { + data, err := rctx.Shell().Output(ctx, fmt.Sprintf( "%s snapshots --json --tag backup_id=%s --tag %s", r.resticCmd(), backup.ID, @@ -177,41 +179,53 @@ func (r *resticRepository) getSnapshotID(ctx context.Context, backup Backup, ds return snaps[0].ShortID, nil } -func (r *resticRepository) Restore(ctx context.Context, backup Backup, ds Dataset, target string) error { - snap, err := r.getSnapshotID(ctx, backup, ds) +func (r *resticRepository) RestoreCmd(ctx context.Context, rctx RuntimeContext, backup *Backup, ds *Dataset, paths []string, target string) (string, error) { + snap, err := r.getSnapshotID(ctx, rctx, backup, ds) if err != nil { - return err + return "", err + } + + cmd := []string{ + fmt.Sprintf("%s restore", r.resticCmd()), } - cmd := fmt.Sprintf( - "%s restore", + + for _, path := range paths { + cmd = append(cmd, fmt.Sprintf("--include %s", path)) + } + + cmd = append(cmd, fmt.Sprintf("--target %s", target)) + cmd = append(cmd, snap) + return strings.Join(cmd, " "), nil +} + +func (r *resticRepository) BackupStreamCmd(backup *Backup, ds *Dataset) string { + fakePath := fmt.Sprintf("/STDIN%s", strings.Replace(ds.Name, "/", "_", -1)) + return fmt.Sprintf( + "%s backup --cleanup-cache --exclude-caches --tag %s --tag backup_id=%s --stdin --stdin-filename %s", r.resticCmd(), + ds.Name, + backup.ID, + fakePath, ) - for _, atom := range ds.Atoms { - if atom.SourcePath != "" { - cmd += fmt.Sprintf(" --include %s", filepath.Join(atom.SourcePath)) - } - } - cmd += fmt.Sprintf(" --target %s", target) - cmd += fmt.Sprintf(" %s", snap) - return r.shell.Run(ctx, cmd) } -func (r *resticRepository) BackupStream(ctx context.Context, backup Backup, ds Dataset, input io.Reader) error { - // Try to do the obvious thing with naming. - name := ds.Name - if len(ds.Atoms) == 1 { - name = fmt.Sprintf("%s.%s", ds.Name, ds.Atoms[0].Name) +func (r *resticRepository) RestoreStreamCmd(ctx context.Context, rctx RuntimeContext, backup *Backup, ds *Dataset, target string) (string, error) { + snap, err := r.getSnapshotID(ctx, rctx, backup, ds) + if err != nil { + return "", err } - return r.shell.Run(ctx, fmt.Sprintf( - "%s backup --stdin --stdin-filename %s", - r.resticCmd(), - name, - )) -} -func (r *resticRepository) RestoreStream(_ context.Context, backup Backup, ds Dataset, target string, output io.Writer) error { - // TODO. - return nil + fakePath := fmt.Sprintf("/STDIN%s", strings.Replace(ds.Name, "/", "_", -1)) + targetPath := filepath.Base(fakePath) + + // Restore the file to a temporary directory, then pipe it. + return fmt.Sprintf( + "(%s restore --target %s %s 1>&2 && cat %s)", + r.resticCmd(), + target, + snap, + filepath.Join(target, targetPath), + ), nil } // Data about a snapshot, obtained from 'restic snapshots --json'. diff --git a/repository_restic_test.go b/repository_restic_test.go index 0df393f..e20095f 100644 --- a/repository_restic_test.go +++ b/repository_restic_test.go @@ -58,27 +58,33 @@ func TestRestic(t *testing.T) { "password": "testpass", }, } - handlerSpecs := []HandlerSpec{ - { + handlerSpecs := []*HandlerSpec{ + &HandlerSpec{ Name: "data", Type: "file", + }, + } + sourceSpecs := []*SourceSpec{ + &SourceSpec{ + Name: "source1", + Handler: "data", + Schedule: "@random_every 1h", Params: map[string]interface{}{ "path": filepath.Join(tmpdir, "data"), }, - }, - } - sourceSpecs := []SourceSpec{ - { - Name: "source1", - Handler: "data", - Atoms: []Atom{ - { - Name: "f1", - RelativePath: "file1", - }, - { - Name: "f2", - RelativePath: "file2", + Datasets: []*DatasetSpec{ + &DatasetSpec{ + Name: "files", + Atoms: []Atom{ + { + Name: "f1", + Path: "file1", + }, + { + Name: "f2", + Path: "file2", + }, + }, }, }, }, @@ -105,7 +111,7 @@ func TestRestic(t *testing.T) { } defer m.Close() - backup, err := m.Backup(context.TODO(), configMgr.getSourceSpecs()) + backup, err := m.Backup(context.TODO(), configMgr.getSourceSpecs()[0]) if err != nil { t.Fatal(err) } @@ -133,7 +139,7 @@ func TestRestic(t *testing.T) { // Now try to restore. err = m.Restore( context.TODO(), - FindRequest{Pattern: "source1/*"}, + &FindRequest{Pattern: "source1/*"}, tmpdir+"/restore", ) if err != nil { @@ -147,3 +153,108 @@ func TestRestic(t *testing.T) { t.Fatalf("data/file1 has bad restored contents: %s", string(data)) } } + +// nolint: gocyclo +func TestRestic_Stream(t *testing.T) { + // Check that we can actually run restic. + if err := checkResticVersion("restic"); err != nil { + t.Skip("can't run restic: ", err) + } + + store := &dummyMetadataStore{} + + tmpdir := createTempDirWithData(t) + defer os.RemoveAll(tmpdir) + + repoSpec := RepositorySpec{ + Name: "main", + Type: "restic", + Params: map[string]interface{}{ + "uri": tmpdir + "/repo", + "password": "testpass", + }, + } + handlerSpecs := []*HandlerSpec{ + &HandlerSpec{ + Name: "data", + Type: "pipe", + Params: map[string]interface{}{ + "backup_command": "echo data", + "restore_command": "read row ; test \"x$$row\" = xdata", + }, + }, + } + sourceSpecs := []*SourceSpec{ + &SourceSpec{ + Name: "source1", + Handler: "data", + Schedule: "@random_every 1h", + Datasets: []*DatasetSpec{ + &DatasetSpec{ + Name: "f1", + Atoms: []Atom{ + { + Name: "f1", + }, + }, + }, + }, + }, + } + queueSpec := jobs.QueueSpec{ + Workers: map[string]int{"backup": 2, "restore": 1}, + } + + // Run the backup. + configMgr, err := NewConfigManager(&Config{ + Queue: queueSpec, + Repository: repoSpec, + HandlerSpecs: handlerSpecs, + SourceSpecs: sourceSpecs, + }) + if err != nil { + t.Fatal(err) + } + defer configMgr.Close() + + m, err := NewManager(context.TODO(), configMgr, store) + if err != nil { + t.Fatal(err) + } + defer m.Close() + + backup, err := m.Backup(context.TODO(), configMgr.getSourceSpecs()[0]) + if err != nil { + t.Fatal(err) + } + if backup.ID == "" || backup.Host == "" { + t.Fatalf("empty fields in backup: %+v", backup) + } + + // Check the 'restic snapshots' output. + output, err := exec.Command("env", "RESTIC_REPOSITORY=", "RESTIC_PASSWORD_FILE=", "RESTIC_PASSWORD=testpass", "restic", "-r", tmpdir+"/repo", "snapshots", "--json").Output() + if err != nil { + t.Fatalf("'restic snapshots' failed: %v", err) + } + snaps, err := parseResticSnapshots(output) + if err != nil { + t.Fatalf("parsing restic snaphots output: %v, output:\n%s", err, string(output)) + } + if len(snaps) != 1 { + t.Fatalf("wrong number of snapshots: %+v", snaps) + } + snap := snaps[0] + if len(snap.Tags) != 2 { + t.Fatalf("woops, bad number of tags: %+v", snap) + } + + // Now try to restore. + err = m.Restore( + context.TODO(), + &FindRequest{Pattern: "source1/*"}, + tmpdir+"/restore", + ) + if err != nil { + t.Fatal("Restore", err) + } +} diff --git a/shell.go b/shell.go index 8fda1ca..183c7a8 100644 --- a/shell.go +++ b/shell.go @@ -2,8 +2,6 @@ package tabacco import ( "context" - "fmt" - "io" "io/ioutil" "log" "os" @@ -108,60 +106,6 @@ func (s *Shell) command(ctx context.Context, arg string) *exec.Cmd { return c } -// RunStdoutPipe runs a command with a function connected to its -// standard output via a pipe. -func (s *Shell) RunStdoutPipe(ctx context.Context, arg string, fn func(io.Reader) error) error { - cmd := s.command(ctx, arg) - log.Printf("stdout_pipe: %s", arg) - stdout, err := cmd.StdoutPipe() - if err != nil { - return err - } - if err := cmd.Start(); err != nil { - return err - } - - // The error from fn takes precedence over the command exit - // status. We still need to call cmd.Wait() in any case. - ferr := fn(stdout) - if ferr != nil { - // By calling stdout.Close() early we're hoping that - // the child process gets a SIGPIPE. - stdout.Close() - } - werr := cmd.Wait() - - if ferr != nil { - return ferr - } - return werr -} - -// RunStdinPipe runs a command with a function connected to its -// standard input via a pipe. -func (s *Shell) RunStdinPipe(ctx context.Context, arg string, fn func(io.Writer) error) error { - cmd := s.command(ctx, arg) - log.Printf("stdin_pipe: %s", arg) - stdin, err := cmd.StdinPipe() - if err != nil { - return err - } - if err := cmd.Start(); err != nil { - return err - } - - // The error from fn takes precedence over the command exit - // status. We still need to call cmd.Wait() in any case. - ferr := fn(stdin) - stdin.Close() // nolint - werr := cmd.Wait() - - if ferr != nil { - return ferr - } - return werr -} - // Run a command. func (s *Shell) Run(ctx context.Context, arg string) error { c := s.command(ctx, arg) @@ -169,16 +113,6 @@ func (s *Shell) Run(ctx context.Context, arg string) error { return c.Run() } -// RunWithEnv runs a command with additional environment variables. -func (s *Shell) RunWithEnv(ctx context.Context, arg string, envMap map[string]string) error { - c := s.command(ctx, arg) - for k, v := range envMap { - c.Env = append(c.Env, fmt.Sprintf("%s=%s", k, v)) - } - c.Stdout = os.Stdout - return c.Run() -} - // Output runs a command and returns the standard output. func (s *Shell) Output(ctx context.Context, arg string) ([]byte, error) { return s.command(ctx, arg).Output() diff --git a/source.go b/source.go index 28edecf..c4fec3c 100644 --- a/source.go +++ b/source.go @@ -1,15 +1,71 @@ package tabacco import ( - "bufio" - "bytes" "context" "errors" "fmt" + "os" "os/exec" + "path/filepath" + "time" + + "gopkg.in/yaml.v2" ) -// SourceSpec defines the configuration for a data source. +// DatasetSpec describes a dataset in the configuration. +type DatasetSpec struct { + Name string `yaml:"name"` + + Atoms []Atom `yaml:"atoms"` + AtomsCommand string `yaml:"atoms_command"` +} + +// Parse a DatasetSpec and return a Dataset. +func (spec *DatasetSpec) Parse(ctx context.Context, src *SourceSpec) (*Dataset, error) { + // Build the atoms list, invoking the atoms_command if + // necessary, and creating actual atoms with absolute names. + name := filepath.Join(src.Name, spec.Name) + var atoms []Atom + for _, a := range spec.Atoms { + atoms = append(atoms, a.withPrefix(name)) + } + if spec.AtomsCommand != "" { + var cmdAtoms []Atom + if err := runYAMLCommand(ctx, spec.AtomsCommand, &cmdAtoms); err != nil { + return nil, fmt.Errorf("source %s: dataset %s: error in atoms command: %v", src.Name, spec.Name, err) + } + for _, a := range cmdAtoms { + atoms = append(atoms, a.withPrefix(name)) + } + } + + return &Dataset{ + Name: name, + Source: src.Name, + Atoms: atoms, + }, nil +} + +// Check syntactical validity of the DatasetSpec. +func (spec *DatasetSpec) Check() error { + if spec.Name == "" { + return errors.New("dataset name is not set") + } + if len(spec.Atoms) > 0 && spec.AtomsCommand != "" { + return errors.New("can't specify both 'atoms' and 'atoms_command'") + } + if len(spec.Atoms) == 0 && spec.AtomsCommand == "" { + return errors.New("must specify one of 'atoms' or 'atoms_command'") + } + return nil +} + +// SourceSpec defines the configuration for a data source. Data +// sources can dynamically or statically generate one or more +// Datasets, each containing one or more Atoms. +// +// Handlers are launched once per Dataset, and they know how to deal +// with backing up / restoring individual Atoms. type SourceSpec struct { Name string `yaml:"name"` Handler string `yaml:"handler"` @@ -17,93 +73,85 @@ type SourceSpec struct { // Schedule to run the backup on. Schedule string `yaml:"schedule"` - // Define atoms statically, or use a script to generate them + // Define Datasets statically, or use a script to generate them // dynamically on every new backup. - Atoms []Atom `yaml:"atoms"` - AtomsCommand string `yaml:"atoms_command"` + Datasets []*DatasetSpec `yaml:"datasets"` + DatasetsCommand string `yaml:"datasets_command"` - //Params map[string]interface{} `json:"params"` + // Commands to run before and after operations on the source. + PreBackupCommand string `yaml:"pre_backup_command"` + PostBackupCommand string `yaml:"post_backup_command"` + PreRestoreCommand string `yaml:"pre_restore_command"` + PostRestoreCommand string `yaml:"post_restore_command"` + + Params Params `yaml:"params"` + + // Timeout for execution of the entire backup operation. + Timeout time.Duration `yaml:"timeout"` } -// Parse a SourceSpec and return a Dataset instance. -func (spec *SourceSpec) Parse(ctx context.Context) (ds Dataset, err error) { - // Invoke the atoms_command if necessary. - atoms := spec.Atoms - if spec.AtomsCommand != "" { - atoms, err = runAtomsCommand(ctx, spec.AtomsCommand) - if err != nil { - return +// Parse a SourceSpec and return one or more Datasets. +func (spec *SourceSpec) Parse(ctx context.Context) ([]*Dataset, error) { + // Build the atoms list, invoking the atoms_command if + // necessary, and creating actual atoms with absolute names. + dspecs := append([]*DatasetSpec{}, spec.Datasets...) + if spec.DatasetsCommand != "" { + var cmdSpecs []*DatasetSpec + if err := runYAMLCommand(ctx, spec.DatasetsCommand, &cmdSpecs); err != nil { + return nil, fmt.Errorf("error in datasets command: %v", err) } + dspecs = append(dspecs, cmdSpecs...) } - ds = normalizeDataset(Dataset{ - Name: spec.Name, - Handler: spec.Handler, - Atoms: atoms, - }) - return + // Call Parse on all datasets. + datasets := make([]*Dataset, 0, len(dspecs)) + for _, dspec := range dspecs { + ds, err := dspec.Parse(ctx, spec) + if err != nil { + return nil, fmt.Errorf("error parsing dataset %s: %v", dspec.Name, err) + } + datasets = append(datasets, ds) + } + return datasets, nil } -// Check that the configuration is valid. Not an alternative to +// Check syntactical validity of the SourceSpec. Not an alternative to // validation at usage time, but it provides an early warning to the // user. Checks the handler name against a string set of handler // names. -func (spec *SourceSpec) Check(handlers map[string]Handler) error { +func (spec *SourceSpec) Check(handlers map[string]*HandlerSpec) error { + if spec.Timeout == 0 { + spec.Timeout = 24 * time.Hour + } + if spec.Name == "" { - return errors.New("name is empty") + return errors.New("source name is not set") + } + if spec.Schedule == "" { + return errors.New("schedule is not set") + } + if spec.Handler == "" { + return errors.New("handler is not set") } if _, ok := handlers[spec.Handler]; !ok { return fmt.Errorf("unknown handler '%s'", spec.Handler) } - if len(spec.Atoms) > 0 && spec.AtomsCommand != "" { - return errors.New("can't specify both 'atoms' and 'atoms_command'") + if len(spec.Datasets) > 0 && spec.DatasetsCommand != "" { + return errors.New("can't specify both 'datasets' and 'datasets_command'") + } + if len(spec.Datasets) == 0 && spec.DatasetsCommand == "" { + return errors.New("must specify one of 'datasets' or 'datasets_command'") } return nil } -func runAtomsCommand(ctx context.Context, cmd string) ([]Atom, error) { +func runYAMLCommand(ctx context.Context, cmd string, obj interface{}) error { c := exec.Command("/bin/sh", "-c", cmd) // #nosec - stdout, err := c.StdoutPipe() + c.Stderr = os.Stderr + output, err := c.Output() if err != nil { - return nil, err - } - defer stdout.Close() // nolint: errcheck - if err := c.Start(); err != nil { - return nil, err - } - - var atoms []Atom - scanner := bufio.NewScanner(stdout) - for scanner.Scan() { - parts := bytes.Fields(scanner.Bytes()) - atom := Atom{Name: string(parts[0])} - if len(parts) == 2 { - atom.RelativePath = string(parts[1]) - } - atoms = append(atoms, atom) - } - return atoms, scanner.Err() -} - -func normalizeDataset(ds Dataset) Dataset { - // If the Dataset has no atoms, add an empty one. - if len(ds.Atoms) == 0 { - ds.Atoms = []Atom{Atom{}} - } - - // If there are multiple atoms, and some (or all) have empty - // RelativePaths, just set their RelativePath equal to their - // Name. - if len(ds.Atoms) > 1 { - var atoms []Atom - for _, atom := range ds.Atoms { - if atom.RelativePath == "" { - atom.RelativePath = atom.Name - } - atoms = append(atoms, atom) - } - ds.Atoms = atoms + return err } - return ds + return yaml.Unmarshal(output, obj) } diff --git a/testdata/sources/source.yml b/testdata/sources/source.yml index 5a5305a..f08b9ad 100644 --- a/testdata/sources/source.yml +++ b/testdata/sources/source.yml @@ -1,3 +1,8 @@ name: source1 handler: file schedule: "@random_every 2m" +params: + path: /usr/share/misc +datasets: + - name: magic + atoms: [{name: magic}] diff --git a/types.go b/types.go index be837c7..7fa7b4f 100644 --- a/types.go +++ b/types.go @@ -3,7 +3,7 @@ package tabacco import ( "context" "fmt" - "io" + "path/filepath" "regexp" "strings" "time" @@ -11,6 +11,18 @@ import ( "git.autistici.org/ai3/tools/tabacco/jobs" ) +// Params are configurable parameters in a format friendly to YAML +// representation. +type Params map[string]interface{} + +// Get a string value for a parameter. +func (p Params) Get(key string) string { + if s, ok := p[key].(string); ok { + return s + } + return "" +} + // Backup is the over-arching entity describing a high level backup // operation. Backups are initiated autonomously by individual hosts, // so each Backup belongs to a single Host. @@ -23,19 +35,28 @@ type Backup struct { // Host. Host string `json:"host"` + + // Datasets. + Datasets []*Dataset `json:"datasets"` } // An Atom is a bit of data that can be restored independently as part -// of a Dataset. +// of a Dataset. The atom Name is an absolute path in the global atom +// namespace, so it is prefixed with the container Dataset name. type Atom struct { - // Name (path-like, not rooted). + // Name (path-like). Name string `json:"name"` - // Relative path with respect to the Dataset. - RelativePath string `json:"rel_path"` + // Special attribute for the 'file' handler (path relative to + // source root path). + Path string `json:"path,omitempty"` +} - // Source path (used for restore). - SourcePath string `json:"source_path,omitempty"` +func (a Atom) withPrefix(pfx string) Atom { + return Atom{ + Name: filepath.Join(pfx, a.Name), + Path: a.Path, + } } // A Dataset describes a data set as a high level structure containing @@ -45,13 +66,13 @@ type Atom struct { // databases (the atom we're interested in), which we might want to // restore independently. type Dataset struct { - // Name of the dataset. Will be prepended to target storage + // Name of the dataset (path-like). Will be prepended to atom // paths. Name string `json:"name"` - // Handler specifies the dataset type (which handler to use to - // backup/restore it). - Handler string `json:"handler"` + // Source is the name of the source that created this Dataset, + // stored so that the restore knows what to do. + Source string `json:"source"` // Atoms that are part of this dataset. Atoms []Atom `json:"atoms"` @@ -74,49 +95,39 @@ func (req *FindRequest) matchPattern(s string) bool { return req.patternRx.MatchString(s) } -// A Version ties together a Dataset and a Backup. -type Version struct { - Dataset Dataset `json:"dataset"` - Backup Backup `json:"backup"` -} - // MetadataStore is the client interface to the global metadata store. type MetadataStore interface { // Find the datasets that match a specific criteria. Only // atoms matching the criteria will be included in the Dataset // objects in the response. - FindAtoms(context.Context, FindRequest) ([][]Version, error) + FindAtoms(context.Context, *FindRequest) ([]*Backup, error) // Add a dataset entry (the Backup might already exist). - AddDataset(context.Context, Backup, Dataset) error + AddDataset(context.Context, *Backup, *Dataset) error } // Handler can backup and restore a specific class of datasets. type Handler interface { - DatasetsForBackup(Dataset) []Dataset - DatasetsForRestore(Dataset) []Dataset - BackupJob(Repository, Backup, Dataset) jobs.Job - RestoreJob(Repository, Backup, Dataset, string) jobs.Job - Spec() HandlerSpec + BackupJob(RuntimeContext, Repository, *Backup, *Dataset) jobs.Job + RestoreJob(RuntimeContext, Repository, *Backup, *Dataset, string) jobs.Job } // Repository is the interface to a remote repository. type Repository interface { - Init(context.Context) error - Prepare(context.Context, Backup) error - Backup(context.Context, Backup, Dataset, string) error - Restore(context.Context, Backup, Dataset, string) error - BackupStream(context.Context, Backup, Dataset, io.Reader) error - RestoreStream(context.Context, Backup, Dataset, string, io.Writer) error + Init(context.Context, RuntimeContext) error + BackupCmd(*Backup, *Dataset, []string) string + RestoreCmd(context.Context, RuntimeContext, *Backup, *Dataset, []string, string) (string, error) + BackupStreamCmd(*Backup, *Dataset) string + RestoreStreamCmd(context.Context, RuntimeContext, *Backup, *Dataset, string) (string, error) Close() error } // Manager for backups and restores. type Manager interface { - BackupJob(context.Context, []SourceSpec) (Backup, jobs.Job, error) - Backup(context.Context, []SourceSpec) (Backup, error) - RestoreJob(context.Context, FindRequest, string) (jobs.Job, error) - Restore(context.Context, FindRequest, string) error + BackupJob(context.Context, *SourceSpec) (*Backup, jobs.Job, error) + Backup(context.Context, *SourceSpec) (*Backup, error) + RestoreJob(context.Context, *FindRequest, string) (jobs.Job, error) + Restore(context.Context, *FindRequest, string) error Close() error // Debug interface. -- GitLab From 1db91b003d68d4c760f93359933e8ae59f3795fd Mon Sep 17 00:00:00 2001 From: ale Date: Mon, 17 Jun 2019 23:39:01 +0100 Subject: [PATCH 02/12] Move the atom full paths completely within the metadata API Paths (for atoms and datasets) only exist within the API itself now. --- config.go | 8 +- config_test.go | 5 +- manager_test.go | 121 ++++++++++++------- metadb/migrations/1_initialize_schema.up.sql | 1 + metadb/migrations/bindata.go | 3 +- metadb/server/service.go | 18 ++- repository_restic.go | 11 +- source.go | 27 +++-- types.go | 13 +- 9 files changed, 130 insertions(+), 77 deletions(-) diff --git a/config.go b/config.go index 0a0c8aa..3e1b061 100644 --- a/config.go +++ b/config.go @@ -64,7 +64,13 @@ func (a *runtimeAssets) Shell() *Shell { } func buildHandlerMap(specs []*HandlerSpec) map[string]*HandlerSpec { - m := make(map[string]*HandlerSpec) + // Create a handler map with a default 'file' spec. + m := map[string]*HandlerSpec{ + "file": &HandlerSpec{ + Name: "file", + Type: "file", + }, + } for _, h := range specs { m[h.Name] = h } diff --git a/config_test.go b/config_test.go index 5bf2e4b..2a28ce6 100644 --- a/config_test.go +++ b/config_test.go @@ -6,7 +6,6 @@ import ( "fmt" "log" "os" - "strings" "testing" "time" ) @@ -211,7 +210,9 @@ func checkTwoUserAccountsAtoms(ra *runtimeAssets, datasets []*Dataset) error { return errors.New("empty dataset name") } for _, atom := range ds.Atoms { - if !strings.HasPrefix(atom.Name, "users/") { + switch atom.Name { + case "account1", "account2": + default: return fmt.Errorf("bad atom name: %s", atom.Name) } numAtoms++ diff --git a/manager_test.go b/manager_test.go index 6f68b57..f1b769d 100644 --- a/manager_test.go +++ b/manager_test.go @@ -3,6 +3,7 @@ package tabacco import ( "context" "log" + "path/filepath" "testing" "time" @@ -12,16 +13,18 @@ import ( type dummyMetadataEntry struct { backupID string backupTS time.Time - name string dsName string host string source string + path string atom Atom } func (e dummyMetadataEntry) match(req *FindRequest) bool { - if req.Pattern != "" && !req.matchPattern(e.name) { - return false + if req.Pattern != "" { + if !req.matchPattern(e.path) { + return false + } } if req.Host != "" && req.Host != e.host { return false @@ -48,65 +51,93 @@ type dummyMetadataStore struct { log []dummyMetadataEntry } -func (d *dummyMetadataStore) FindAtoms(_ context.Context, req *FindRequest) ([]*Backup, error) { - tmp := make(map[string]map[string][]dummyMetadataEntry) - for _, l := range d.log { - if !l.match(req) { - continue +// Argh! This is copy&pasted from server/service.go, but with minor +// modifications due to the different types... terrible. +func keepNumVersions(dbAtoms []dummyMetadataEntry, numVersions int) []dummyMetadataEntry { + // numVersions == 0 is remapped to 1. + if numVersions < 1 { + numVersions = 1 + } + + count := 0 + tmp := make(map[string][]dummyMetadataEntry) + for _, a := range dbAtoms { + l := tmp[a.path] + if len(l) < numVersions { + l = append(l, a) + count++ } + tmp[a.path] = l + } + out := make([]dummyMetadataEntry, 0, count) + for _, l := range tmp { + out = append(out, l...) + } + return out +} + +func groupByBackup(dbAtoms []dummyMetadataEntry) []*Backup { + // As we scan through dbAtoms, aggregate into Backups and Datasets. + backups := make(map[string]*Backup) + dsm := make(map[string]map[string]*Dataset) - m, ok := tmp[l.name] + for _, atom := range dbAtoms { + // Create the Backup object if it does not exist. + b, ok := backups[atom.backupID] if !ok { - m = make(map[string][]dummyMetadataEntry) - tmp[l.name] = m + b = atom.toBackup() + backups[atom.backupID] = b } - m[l.backupID] = append(m[l.name], l) - } - - count := req.NumVersions - if count < 1 { - count = 1 - } - - // Accumulate output into Backups - btmp := make(map[string]*Backup) - for _, dsmap := range tmp { - for _, dslog := range dsmap { - ds := dslog[0].toDataset() - b := dslog[0].toBackup() - bb, ok := btmp[b.ID] - if !ok { - btmp[b.ID] = b - bb = b - } - bb.Datasets = append(bb.Datasets, ds) - - ds.Atoms = nil - if len(dslog) > count { - dslog = dslog[len(dslog)-count:] - } - for _, l := range dslog { - ds.Atoms = append(ds.Atoms, l.atom) - } + + // Create the Dataset object for this Backup in the + // two-level map (creating the intermediate map if + // necessary). + tmp, ok := dsm[atom.backupID] + if !ok { + tmp = make(map[string]*Dataset) + dsm[atom.backupID] = tmp } + // Match datasets by their full path. + dsPath := filepath.Join(atom.source, atom.dsName) + ds, ok := tmp[dsPath] + if !ok { + ds = atom.toDataset() + tmp[dsPath] = ds + b.Datasets = append(b.Datasets, ds) + } + + // Finally, add the atom to the dataset. + ds.Atoms = append(ds.Atoms, atom.atom) } - var out []*Backup - for _, b := range btmp { + + out := make([]*Backup, 0, len(backups)) + for _, b := range backups { out = append(out, b) } - return out, nil + return out +} + +func (d *dummyMetadataStore) FindAtoms(_ context.Context, req *FindRequest) ([]*Backup, error) { + var tmp []dummyMetadataEntry + for _, l := range d.log { + if !l.match(req) { + continue + } + tmp = append(tmp, l) + } + + return groupByBackup(keepNumVersions(tmp, req.NumVersions)), nil } func (d *dummyMetadataStore) AddDataset(_ context.Context, backup *Backup, ds *Dataset) error { log.Printf("AddDataset: %+v", *ds) for _, atom := range ds.Atoms { - //name := fmt.Sprintf("%s/%s", ds.Name, atom.Name) - name := atom.Name + path := filepath.Join(ds.Source, ds.Name, atom.Name) d.log = append(d.log, dummyMetadataEntry{ backupID: backup.ID, backupTS: backup.Timestamp, host: backup.Host, - name: name, + path: path, dsName: ds.Name, source: ds.Source, atom: atom, diff --git a/metadb/migrations/1_initialize_schema.up.sql b/metadb/migrations/1_initialize_schema.up.sql index 97df028..02a304e 100644 --- a/metadb/migrations/1_initialize_schema.up.sql +++ b/metadb/migrations/1_initialize_schema.up.sql @@ -9,6 +9,7 @@ CREATE TABLE log ( dataset_name VARCHAR(128), dataset_source VARCHAR(128), atom_name VARCHAR(255), + atom_full_path VARCHAR(255), atom_path VARCHAR(255) ); diff --git a/metadb/migrations/bindata.go b/metadb/migrations/bindata.go index 320234c..2323a2f 100644 --- a/metadb/migrations/bindata.go +++ b/metadb/migrations/bindata.go @@ -79,6 +79,7 @@ CREATE TABLE log ( dataset_name VARCHAR(128), dataset_source VARCHAR(128), atom_name VARCHAR(255), + atom_full_path VARCHAR(255), atom_path VARCHAR(255) ); @@ -97,7 +98,7 @@ func _1_initialize_schemaUpSql() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "1_initialize_schema.up.sql", size: 539, mode: os.FileMode(420), modTime: time.Unix(1560765826, 0)} + info := bindataFileInfo{name: "1_initialize_schema.up.sql", size: 576, mode: os.FileMode(420), modTime: time.Unix(1560809647, 0)} a := &asset{bytes: bytes, info: info} return a, nil } diff --git a/metadb/server/service.go b/metadb/server/service.go index acbd0ab..b2e47bc 100644 --- a/metadb/server/service.go +++ b/metadb/server/service.go @@ -5,6 +5,7 @@ import ( "database/sql" "fmt" "log" + "path/filepath" "strings" "time" @@ -19,12 +20,19 @@ type dbAtom struct { DatasetName string DatasetSource string AtomName string + AtomFullPath string AtomPath string } func makeAtoms(backup tabacco.Backup, ds tabacco.Dataset) []dbAtom { var out []dbAtom for _, atom := range ds.Atoms { + + // It is here that we 'materialize' the concept of Atom names + // as paths, by concatenating source/dataset/atom and storing + // it as the atom name. + path := filepath.Join(ds.Source, ds.Name, atom.Name) + out = append(out, dbAtom{ BackupID: backup.ID, BackupTimestamp: backup.Timestamp, @@ -33,6 +41,7 @@ func makeAtoms(backup tabacco.Backup, ds tabacco.Dataset) []dbAtom { DatasetSource: ds.Source, AtomName: atom.Name, AtomPath: atom.Path, + AtomFullPath: path, }) } return out @@ -69,12 +78,12 @@ func keepNumVersions(dbAtoms []*dbAtom, numVersions int) []*dbAtom { count := 0 tmp := make(map[string][]*dbAtom) for _, a := range dbAtoms { - l := tmp[a.AtomName] + l := tmp[a.AtomFullPath] if len(l) < numVersions { l = append(l, a) count++ } - tmp[a.AtomName] = l + tmp[a.AtomFullPath] = l } out := make([]*dbAtom, 0, count) for _, l := range tmp { @@ -153,9 +162,9 @@ var statements = map[string]string{ INSERT INTO log ( backup_id, backup_timestamp, backup_host, dataset_name, dataset_source, - atom_name, atom_path + atom_name, atom_path, atom_full_path ) VALUES ( - ?, ?, ?, ?, ?, ?, ? + ?, ?, ?, ?, ?, ?, ?, ? ) `, } @@ -178,6 +187,7 @@ func (s *Service) AddDataset(ctx context.Context, backup tabacco.Backup, ds taba dbAtom.DatasetSource, dbAtom.AtomName, dbAtom.AtomPath, + dbAtom.AtomFullPath, ); err != nil { return err } diff --git a/repository_restic.go b/repository_restic.go index 8f9e8e0..fd1fe47 100644 --- a/repository_restic.go +++ b/repository_restic.go @@ -198,8 +198,15 @@ func (r *resticRepository) RestoreCmd(ctx context.Context, rctx RuntimeContext, return strings.Join(cmd, " "), nil } +// A special path for stdin datasets that is likely to be unused by the +// rest of the filesystem (the path namespace in Restic is global). +func datasetStdinPath(ds *Dataset) string { + dsPath := filepath.Join(ds.Source, ds.Name) + return fmt.Sprintf("/STDIN_%s", strings.Replace(dsPath, "/", "_", -1)) +} + func (r *resticRepository) BackupStreamCmd(backup *Backup, ds *Dataset) string { - fakePath := fmt.Sprintf("/STDIN%s", strings.Replace(ds.Name, "/", "_", -1)) + fakePath := datasetStdinPath(ds) return fmt.Sprintf( "%s backup --cleanup-cache --exclude-caches --tag %s --tag backup_id=%s --stdin --stdin-filename %s", r.resticCmd(), @@ -215,7 +222,7 @@ func (r *resticRepository) RestoreStreamCmd(ctx context.Context, rctx RuntimeCon return "", err } - fakePath := fmt.Sprintf("/STDIN%s", strings.Replace(ds.Name, "/", "_", -1)) + fakePath := datasetStdinPath(ds) targetPath := filepath.Base(fakePath) // Restore the file to a temporary directory, then pipe it. diff --git a/source.go b/source.go index c4fec3c..efe8bda 100644 --- a/source.go +++ b/source.go @@ -6,9 +6,9 @@ import ( "fmt" "os" "os/exec" - "path/filepath" "time" + "git.autistici.org/ai3/tools/tabacco/util" "gopkg.in/yaml.v2" ) @@ -22,25 +22,19 @@ type DatasetSpec struct { // Parse a DatasetSpec and return a Dataset. func (spec *DatasetSpec) Parse(ctx context.Context, src *SourceSpec) (*Dataset, error) { - // Build the atoms list, invoking the atoms_command if - // necessary, and creating actual atoms with absolute names. - name := filepath.Join(src.Name, spec.Name) + // Build the atoms list, invoking the atoms_command if necessary. var atoms []Atom - for _, a := range spec.Atoms { - atoms = append(atoms, a.withPrefix(name)) - } + atoms = append(atoms, spec.Atoms...) if spec.AtomsCommand != "" { var cmdAtoms []Atom if err := runYAMLCommand(ctx, spec.AtomsCommand, &cmdAtoms); err != nil { return nil, fmt.Errorf("source %s: dataset %s: error in atoms command: %v", src.Name, spec.Name, err) } - for _, a := range cmdAtoms { - atoms = append(atoms, a.withPrefix(name)) - } + atoms = append(atoms, cmdAtoms...) } return &Dataset{ - Name: name, + Name: spec.Name, Source: src.Name, Atoms: atoms, }, nil @@ -142,7 +136,16 @@ func (spec *SourceSpec) Check(handlers map[string]*HandlerSpec) error { if len(spec.Datasets) == 0 && spec.DatasetsCommand == "" { return errors.New("must specify one of 'datasets' or 'datasets_command'") } - return nil + + // Check the datasets, at least those that are provided + // statically. + merr := new(util.MultiError) + for _, ds := range spec.Datasets { + if err := ds.Check(); err != nil { + merr.Add(fmt.Errorf("dataset %s: %v", ds.Name, err)) + } + } + return merr.OrNil() } func runYAMLCommand(ctx context.Context, cmd string, obj interface{}) error { diff --git a/types.go b/types.go index 7fa7b4f..699f60e 100644 --- a/types.go +++ b/types.go @@ -3,7 +3,6 @@ package tabacco import ( "context" "fmt" - "path/filepath" "regexp" "strings" "time" @@ -41,8 +40,9 @@ type Backup struct { } // An Atom is a bit of data that can be restored independently as part -// of a Dataset. The atom Name is an absolute path in the global atom -// namespace, so it is prefixed with the container Dataset name. +// of a Dataset. Atoms are identified uniquely by their absolute path +// in the global atom namespace: this path is built by concatenating +// the source name, the dataset name, and the atom name. type Atom struct { // Name (path-like). Name string `json:"name"` @@ -52,13 +52,6 @@ type Atom struct { Path string `json:"path,omitempty"` } -func (a Atom) withPrefix(pfx string) Atom { - return Atom{ - Name: filepath.Join(pfx, a.Name), - Path: a.Path, - } -} - // A Dataset describes a data set as a high level structure containing // one or more atoms. The 1-to-many scenario is justified by the // following use case: imagine a sql database server, we may want to -- GitLab From 7f59ad7f426573bdfc962376d3483badc9d0365d Mon Sep 17 00:00:00 2001 From: ale Date: Mon, 17 Jun 2019 23:49:05 +0100 Subject: [PATCH 03/12] Do not fail if path is not set on the file source It may be set on the atom instead. --- handler_file.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/handler_file.go b/handler_file.go index 3fa6d54..4fe2073 100644 --- a/handler_file.go +++ b/handler_file.go @@ -2,7 +2,6 @@ package tabacco import ( "context" - "errors" "path/filepath" "git.autistici.org/ai3/tools/tabacco/jobs" @@ -13,11 +12,7 @@ type fileHandler struct { } func newFileHandler(name string, params Params) (Handler, error) { - path := params.Get("path") - if path == "" { - return nil, errors.New("path not set") - } - return &fileHandler{path: path}, nil + return &fileHandler{path: params.Get("path")}, nil } // Convert the atom to a path. -- GitLab From 46dcd9eacb93a473bb9587910284ac159a2e98ef Mon Sep 17 00:00:00 2001 From: ale Date: Tue, 18 Jun 2019 00:14:21 +0100 Subject: [PATCH 04/12] Add support for compression Use LZ4 by default. --- handler_pipe.go | 56 ++++++++++-- repository_restic_test.go | 186 ++++++++++++++++---------------------- types.go | 16 ++++ 3 files changed, 143 insertions(+), 115 deletions(-) diff --git a/handler_pipe.go b/handler_pipe.go index e1e3fde..0f1c298 100644 --- a/handler_pipe.go +++ b/handler_pipe.go @@ -14,9 +14,19 @@ import ( // generates a single file on the repository, and thus it can't // distinguish multiple atoms inside it. type pipeHandler struct { - backupCmd, restoreCmd string + backupCmd string + restoreCmd string + compress bool + compressCmd string + decompressCmd string } +const ( + defaultCompress = false + defaultCompressCmd = "lz4c -3z - -" + defaultDecompressCmd = "lz4c -d - -" +) + func newPipeHandler(name string, params Params) (Handler, error) { backupCmd := params.Get("backup_command") if backupCmd == "" { @@ -28,16 +38,33 @@ func newPipeHandler(name string, params Params) (Handler, error) { return nil, errors.New("restore_command not set") } - return &pipeHandler{ - backupCmd: backupCmd, - restoreCmd: restoreCmd, - }, nil + // Create the pipeHandler with defaults, which can be + // overriden from Params. + h := &pipeHandler{ + backupCmd: backupCmd, + restoreCmd: restoreCmd, + compress: defaultCompress, + compressCmd: defaultCompressCmd, + decompressCmd: defaultDecompressCmd, + } + if b, ok := params.GetBool("compress"); ok { + h.compress = b + } + if s := params.Get("compress_command"); s != "" { + h.compressCmd = s + } + if s := params.Get("decompress_command"); s != "" { + h.decompressCmd = s + } + + return h, nil } func (h *pipeHandler) BackupJob(rctx RuntimeContext, repo Repository, backup *Backup, ds *Dataset) jobs.Job { cmd := fmt.Sprintf( - "(%s) | %s", + "(%s)%s | %s", expandVars(h.backupCmd, backup, ds), + h.compressSuffix(), repo.BackupStreamCmd(backup, ds), ) return jobs.JobFunc(func(ctx context.Context) error { @@ -52,14 +79,29 @@ func (h *pipeHandler) RestoreJob(rctx RuntimeContext, repo Repository, backup *B return err } cmd := fmt.Sprintf( - "%s | (%s)", + "%s | %s(%s)", restoreCmd, + h.decompressPrefix(), expandVars(h.restoreCmd, backup, ds), ) return rctx.Shell().Run(ctx, cmd) }) } +func (h *pipeHandler) compressSuffix() string { + if !h.compress { + return "" + } + return fmt.Sprintf(" | %s", h.compressCmd) +} + +func (h *pipeHandler) decompressPrefix() string { + if !h.compress { + return "" + } + return fmt.Sprintf("%s | ", h.decompressCmd) +} + func expandVars(s string, backup *Backup, ds *Dataset) string { return os.Expand(s, func(key string) string { switch key { diff --git a/repository_restic_test.go b/repository_restic_test.go index e20095f..f08b421 100644 --- a/repository_restic_test.go +++ b/repository_restic_test.go @@ -39,7 +39,7 @@ func createTempDirWithData(t *testing.T) string { } // nolint: gocyclo -func TestRestic(t *testing.T) { +func runResticTest(t *testing.T, tmpdir string, source *SourceSpec, restorePattern string, checkFn func(testing.TB, string)) { // Check that we can actually run restic. if err := checkResticVersion("restic"); err != nil { t.Skip("can't run restic: ", err) @@ -47,9 +47,6 @@ func TestRestic(t *testing.T) { store := &dummyMetadataStore{} - tmpdir := createTempDirWithData(t) - defer os.RemoveAll(tmpdir) - repoSpec := RepositorySpec{ Name: "main", Type: "restic", @@ -59,39 +56,21 @@ func TestRestic(t *testing.T) { }, } handlerSpecs := []*HandlerSpec{ + // 'file' is predefined. &HandlerSpec{ Name: "data", - Type: "file", - }, - } - sourceSpecs := []*SourceSpec{ - &SourceSpec{ - Name: "source1", - Handler: "data", - Schedule: "@random_every 1h", + Type: "pipe", Params: map[string]interface{}{ - "path": filepath.Join(tmpdir, "data"), - }, - Datasets: []*DatasetSpec{ - &DatasetSpec{ - Name: "files", - Atoms: []Atom{ - { - Name: "f1", - Path: "file1", - }, - { - Name: "f2", - Path: "file2", - }, - }, - }, + "backup_command": "echo data", + // The restore command also verifies the data. + "restore_command": "read row ; test \"x$$row\" = xdata", }, }, } queueSpec := jobs.QueueSpec{ Workers: map[string]int{"backup": 2, "restore": 1}, } + sourceSpecs := []*SourceSpec{source} // Run the backup. configMgr, err := NewConfigManager(&Config{ @@ -139,12 +118,19 @@ func TestRestic(t *testing.T) { // Now try to restore. err = m.Restore( context.TODO(), - &FindRequest{Pattern: "source1/*"}, + &FindRequest{Pattern: restorePattern}, tmpdir+"/restore", ) if err != nil { t.Fatal("Restore", err) } + + if checkFn != nil { + checkFn(t, tmpdir) + } +} + +func checkRestoredData(t testing.TB, tmpdir string) { data, err := ioutil.ReadFile(filepath.Join(tmpdir, "restore", tmpdir, "data", "file1")) if err != nil { t.Fatalf("data/file1 has not been restored: %v", err) @@ -154,37 +140,46 @@ func TestRestic(t *testing.T) { } } -// nolint: gocyclo -func TestRestic_Stream(t *testing.T) { - // Check that we can actually run restic. - if err := checkResticVersion("restic"); err != nil { - t.Skip("can't run restic: ", err) - } - - store := &dummyMetadataStore{} - +func TestRestic(t *testing.T) { tmpdir := createTempDirWithData(t) defer os.RemoveAll(tmpdir) - repoSpec := RepositorySpec{ - Name: "main", - Type: "restic", - Params: map[string]interface{}{ - "uri": tmpdir + "/repo", - "password": "testpass", - }, - } - handlerSpecs := []*HandlerSpec{ - &HandlerSpec{ - Name: "data", - Type: "pipe", + runResticTest( + t, tmpdir, + &SourceSpec{ + Name: "source1", + Handler: "file", + Schedule: "@random_every 1h", Params: map[string]interface{}{ - "backup_command": "echo data", - "restore_command": "read row ; test \"x$$row\" = xdata", + "path": filepath.Join(tmpdir, "data"), + }, + Datasets: []*DatasetSpec{ + &DatasetSpec{ + Name: "files", + Atoms: []Atom{ + { + Name: "f1", + Path: "file1", + }, + { + Name: "f2", + Path: "file2", + }, + }, + }, }, }, - } - sourceSpecs := []*SourceSpec{ + "source1/*", + checkRestoredData, + ) +} + +func TestRestic_Stream(t *testing.T) { + tmpdir := createTempDirWithData(t) + defer os.RemoveAll(tmpdir) + + runResticTest( + t, tmpdir, &SourceSpec{ Name: "source1", Handler: "data", @@ -200,61 +195,36 @@ func TestRestic_Stream(t *testing.T) { }, }, }, - } - queueSpec := jobs.QueueSpec{ - Workers: map[string]int{"backup": 2, "restore": 1}, - } - - // Run the backup. - configMgr, err := NewConfigManager(&Config{ - Queue: queueSpec, - Repository: repoSpec, - HandlerSpecs: handlerSpecs, - SourceSpecs: sourceSpecs, - }) - if err != nil { - t.Fatal(err) - } - defer configMgr.Close() - - m, err := NewManager(context.TODO(), configMgr, store) - if err != nil { - t.Fatal(err) - } - defer m.Close() - - backup, err := m.Backup(context.TODO(), configMgr.getSourceSpecs()[0]) - if err != nil { - t.Fatal(err) - } - if backup.ID == "" || backup.Host == "" { - t.Fatalf("empty fields in backup: %+v", backup) - } + "source1/*", + nil, + ) +} - // Check the 'restic snapshots' output. - output, err := exec.Command("env", "RESTIC_REPOSITORY=", "RESTIC_PASSWORD_FILE=", "RESTIC_PASSWORD=testpass", "restic", "-r", tmpdir+"/repo", "snapshots", "--json").Output() - if err != nil { - t.Fatalf("'restic snapshots' failed: %v", err) - } - snaps, err := parseResticSnapshots(output) - if err != nil { - t.Fatalf("parsing restic snaphots output: %v, output:\n%s", err, string(output)) - } - if len(snaps) != 1 { - t.Fatalf("wrong number of snapshots: %+v", snaps) - } - snap := snaps[0] - if len(snap.Tags) != 2 { - t.Fatalf("woops, bad number of tags: %+v", snap) - } +func TestRestic_Stream_Compress(t *testing.T) { + tmpdir := createTempDirWithData(t) + defer os.RemoveAll(tmpdir) - // Now try to restore. - err = m.Restore( - context.TODO(), - &FindRequest{Pattern: "source1/*"}, - tmpdir+"/restore", + runResticTest( + t, tmpdir, + &SourceSpec{ + Name: "source1", + Handler: "data", + Schedule: "@random_every 1h", + Datasets: []*DatasetSpec{ + &DatasetSpec{ + Name: "f1", + Atoms: []Atom{ + { + Name: "f1", + }, + }, + }, + }, + Params: map[string]interface{}{ + "compress": true, + }, + }, + "source1/*", + nil, ) - if err != nil { - t.Fatal("Restore", err) - } } diff --git a/types.go b/types.go index 699f60e..5ed33a2 100644 --- a/types.go +++ b/types.go @@ -22,6 +22,22 @@ func (p Params) Get(key string) string { return "" } +// GetBool returns a boolean value for a parameter (may be a string). +// Returns value and presence. +func (p Params) GetBool(key string) (bool, bool) { + if b, ok := p[key].(bool); ok { + return b, true + } + if s, ok := p[key].(string); ok { + switch strings.ToLower(s) { + case "on", "yes", "true", "1": + return true, true + } + return false, true + } + return false, false +} + // Backup is the over-arching entity describing a high level backup // operation. Backups are initiated autonomously by individual hosts, // so each Backup belongs to a single Host. -- GitLab From c42a63493ae3b05ebf974e480d63fc4d21566b4f Mon Sep 17 00:00:00 2001 From: ale Date: Tue, 18 Jun 2019 11:03:45 +0100 Subject: [PATCH 05/12] Enforce source name uniqueness in config --- config.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/config.go b/config.go index 3e1b061..20bed42 100644 --- a/config.go +++ b/config.go @@ -96,13 +96,20 @@ func (c *Config) parse() (*runtimeAssets, error) { // Validate the sources (Parse is called later at runtime). // Sources that fail the check are removed from the - // SourceSpecs array. + // SourceSpecs array. We also check that sources have unique + // names. + tmp := make(map[string]struct{}{}) var srcs []*SourceSpec for _, spec := range c.SourceSpecs { if err := spec.Check(handlerMap); err != nil { merr.Add(fmt.Errorf("source %s: %v", spec.Name, err)) continue } + if _, ok := tmp[spec.Name]; ok { + merr.Add(fmt.Errorf("duplicated source %s", spec.Name)) + continue + } + tmp[spec.Name] = struct{}{} srcs = append(srcs, spec) } c.SourceSpecs = srcs -- GitLab From f01658f1d7f95bf9809e1e4608b04972258248a4 Mon Sep 17 00:00:00 2001 From: ale Date: Tue, 18 Jun 2019 11:04:13 +0100 Subject: [PATCH 06/12] Fix typo --- config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.go b/config.go index 20bed42..0454d29 100644 --- a/config.go +++ b/config.go @@ -98,7 +98,7 @@ func (c *Config) parse() (*runtimeAssets, error) { // Sources that fail the check are removed from the // SourceSpecs array. We also check that sources have unique // names. - tmp := make(map[string]struct{}{}) + tmp := make(map[string]struct{}) var srcs []*SourceSpec for _, spec := range c.SourceSpecs { if err := spec.Check(handlerMap); err != nil { -- GitLab From ad361dfaa8294a4e4d43beb5cdf8812641a87dcf Mon Sep 17 00:00:00 2001 From: ale Date: Tue, 18 Jun 2019 12:47:13 +0100 Subject: [PATCH 07/12] Add a few locks to prevent data races --- config.go | 2 ++ manager_test.go | 8 ++++++++ repository_restic.go | 29 ++++++++++++++--------------- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/config.go b/config.go index 0454d29..a424e76 100644 --- a/config.go +++ b/config.go @@ -269,12 +269,14 @@ func NewConfigManager(config *Config) (*ConfigManager, error) { } go func() { for range m.notifyCh { + m.mx.Lock() for _, lch := range m.listeners { select { case lch <- struct{}{}: default: } } + m.mx.Unlock() } }() return m, nil diff --git a/manager_test.go b/manager_test.go index f1b769d..52e1c0c 100644 --- a/manager_test.go +++ b/manager_test.go @@ -4,6 +4,7 @@ import ( "context" "log" "path/filepath" + "sync" "testing" "time" @@ -48,6 +49,7 @@ func (e dummyMetadataEntry) toBackup() *Backup { } type dummyMetadataStore struct { + mx sync.Mutex log []dummyMetadataEntry } @@ -118,6 +120,9 @@ func groupByBackup(dbAtoms []dummyMetadataEntry) []*Backup { } func (d *dummyMetadataStore) FindAtoms(_ context.Context, req *FindRequest) ([]*Backup, error) { + d.mx.Lock() + defer d.mx.Unlock() + var tmp []dummyMetadataEntry for _, l := range d.log { if !l.match(req) { @@ -130,6 +135,9 @@ func (d *dummyMetadataStore) FindAtoms(_ context.Context, req *FindRequest) ([]* } func (d *dummyMetadataStore) AddDataset(_ context.Context, backup *Backup, ds *Dataset) error { + d.mx.Lock() + defer d.mx.Unlock() + log.Printf("AddDataset: %+v", *ds) for _, atom := range ds.Atoms { path := filepath.Join(ds.Source, ds.Name, atom.Name) diff --git a/repository_restic.go b/repository_restic.go index fd1fe47..0d1ad3c 100644 --- a/repository_restic.go +++ b/repository_restic.go @@ -7,11 +7,13 @@ import ( "fmt" "io" "io/ioutil" + "log" "os" "os/exec" "path/filepath" "regexp" "strings" + "sync" "time" "github.com/hashicorp/go-version" @@ -25,7 +27,7 @@ type resticRepository struct { excludeFiles []string autoPrune bool - initialized bool + initialized sync.Once } func (r *resticRepository) resticCmd() string { @@ -122,20 +124,17 @@ func (r *resticRepository) Close() error { } func (r *resticRepository) Init(ctx context.Context, rctx RuntimeContext) error { - if r.initialized { - return nil - } - - // Restic init will fail the second time we run it, ignore - // errors. - err := rctx.Shell().Run(ctx, fmt.Sprintf( - "%s init --quiet || true", - r.resticCmd(), - )) - if err == nil { - r.initialized = true - } - return err + r.initialized.Do(func() { + // Restic init will fail if the repository is already + // initialized, ignore errors (but log them). + if err := rctx.Shell().Run(ctx, fmt.Sprintf( + "%s init --quiet || true", + r.resticCmd(), + )); err != nil { + log.Printf("restic repository init failed (likely harmless): %v", err) + } + }) + return nil } func (r *resticRepository) Prepare(ctx context.Context, rctx RuntimeContext, backup *Backup) error { -- GitLab From 6a65647cb92ae9e46beae5854e064fc8beca0039 Mon Sep 17 00:00:00 2001 From: ale Date: Wed, 19 Jun 2019 08:58:34 +0100 Subject: [PATCH 08/12] Remove the name from Dataset and replace it with an id This makes it easier to generate application-level atom paths that are manageable. Datasets are given unique IDs at generation time. --- agent_test.go | 3 +-- config_test.go | 9 +++----- handler_pipe.go | 2 -- manager.go | 2 +- manager_test.go | 21 ++++++++----------- .../migrations/1_initialize_schema.down.sql | 2 +- metadb/migrations/1_initialize_schema.up.sql | 6 +++--- metadb/migrations/bindata.go | 12 +++++------ metadb/server/service.go | 20 +++++++++--------- metadb/server/service_test.go | 1 - repository_restic.go | 21 ++++++++++--------- repository_restic_test.go | 3 --- source.go | 13 +++++------- testdata/sources/source.yml | 3 +-- types.go | 3 ++- 15 files changed, 53 insertions(+), 68 deletions(-) diff --git a/agent_test.go b/agent_test.go index 4daca34..f579f4f 100644 --- a/agent_test.go +++ b/agent_test.go @@ -36,12 +36,11 @@ func (m *fakeManager) GetStatus() ([]jobs.Status, []jobs.Status, []jobs.Status) func TestMakeSchedule(t *testing.T) { sourceSpecs := []*SourceSpec{ &SourceSpec{ - Name: "source1", + Name: "source1/users", Handler: "file1", Schedule: "@random_every 1d", Datasets: []*DatasetSpec{ &DatasetSpec{ - Name: "users", Atoms: []Atom{ { Name: "user1", diff --git a/config_test.go b/config_test.go index 2a28ce6..d516c7f 100644 --- a/config_test.go +++ b/config_test.go @@ -83,13 +83,11 @@ func TestConfig_Parse(t *testing.T) { Schedule: "@random_every 24h", Datasets: []*DatasetSpec{ &DatasetSpec{ - Name: "account1", Atoms: []Atom{ {Name: "account1"}, }, }, { - Name: "account2", Atoms: []Atom{ {Name: "account2"}, }, @@ -117,7 +115,6 @@ func TestConfig_Parse(t *testing.T) { Schedule: "@random_every 24h", Datasets: []*DatasetSpec{ &DatasetSpec{ - Name: "users", Atoms: []Atom{ {Name: "account1"}, {Name: "account2"}, @@ -144,7 +141,7 @@ func TestConfig_Parse(t *testing.T) { Name: "users", Handler: "file", Schedule: "@random_every 24h", - DatasetsCommand: "echo '[{name: account1, atoms: [{name: account1}, {name: account2}]}]'", + DatasetsCommand: "echo '[{atoms: [{name: account1}, {name: account2}]}]'", }, }, HandlerSpecs: []*HandlerSpec{ @@ -206,8 +203,8 @@ func parseAllSources(ra *runtimeAssets, specs []*SourceSpec) ([]*Dataset, error) func checkTwoUserAccountsAtoms(ra *runtimeAssets, datasets []*Dataset) error { var numAtoms int for _, ds := range datasets { - if ds.Name == "" { - return errors.New("empty dataset name") + if ds.ID == "" { + return errors.New("empty dataset ID") } for _, atom := range ds.Atoms { switch atom.Name { diff --git a/handler_pipe.go b/handler_pipe.go index 0f1c298..5f5c174 100644 --- a/handler_pipe.go +++ b/handler_pipe.go @@ -109,8 +109,6 @@ func expandVars(s string, backup *Backup, ds *Dataset) string { return key case "backup.id": return backup.ID - case "ds.name": - return ds.Name case "atom.names": names := make([]string, 0, len(ds.Atoms)) for _, a := range ds.Atoms { diff --git a/manager.go b/manager.go index 651cc27..41fbe24 100644 --- a/manager.go +++ b/manager.go @@ -63,7 +63,7 @@ func (j *metadataJob) RunContext(ctx context.Context) error { err := j.Job.RunContext(ctx) if err == nil { if merr := j.ms.AddDataset(ctx, j.backup, j.ds); merr != nil { - log.Printf("%s: error saving metadata: %v", j.ds.Name, merr) + log.Printf("%s@%s: error saving metadata: %v", j.ds.Source, j.ds.ID, merr) } } return err diff --git a/manager_test.go b/manager_test.go index 52e1c0c..3150540 100644 --- a/manager_test.go +++ b/manager_test.go @@ -14,7 +14,7 @@ import ( type dummyMetadataEntry struct { backupID string backupTS time.Time - dsName string + dsID string host string source string path string @@ -35,7 +35,7 @@ func (e dummyMetadataEntry) match(req *FindRequest) bool { func (e dummyMetadataEntry) toDataset() *Dataset { return &Dataset{ - Name: e.dsName, + ID: e.dsID, Source: e.source, } } @@ -99,12 +99,11 @@ func groupByBackup(dbAtoms []dummyMetadataEntry) []*Backup { tmp = make(map[string]*Dataset) dsm[atom.backupID] = tmp } - // Match datasets by their full path. - dsPath := filepath.Join(atom.source, atom.dsName) - ds, ok := tmp[dsPath] + // Match datasets by their unique ID. + ds, ok := tmp[atom.dsID] if !ok { ds = atom.toDataset() - tmp[dsPath] = ds + tmp[atom.dsID] = ds b.Datasets = append(b.Datasets, ds) } @@ -140,13 +139,13 @@ func (d *dummyMetadataStore) AddDataset(_ context.Context, backup *Backup, ds *D log.Printf("AddDataset: %+v", *ds) for _, atom := range ds.Atoms { - path := filepath.Join(ds.Source, ds.Name, atom.Name) + path := filepath.Join(ds.Source, atom.Name) d.log = append(d.log, dummyMetadataEntry{ backupID: backup.ID, backupTS: backup.Timestamp, host: backup.Host, path: path, - dsName: ds.Name, + dsID: ds.ID, source: ds.Source, atom: atom, }) @@ -189,11 +188,9 @@ func TestManager_Backup(t *testing.T) { Schedule: "@random_every 1h", Datasets: []*DatasetSpec{ &DatasetSpec{ - Name: "user1", Atoms: []Atom{{Name: "user1"}}, }, &DatasetSpec{ - Name: "user2", Atoms: []Atom{{Name: "user2"}}, }, }, @@ -252,11 +249,11 @@ func TestManager_Backup(t *testing.T) { } // A pattern matching a single atom. - resp, err = store.FindAtoms(context.TODO(), &FindRequest{Pattern: "source1/user2/user2"}) + resp, err = store.FindAtoms(context.TODO(), &FindRequest{Pattern: "source1/user2"}) if err != nil { t.Fatal("FindAtoms", err) } if len(resp) != 1 { - t.Fatalf("bad FindAtoms(source1/user2/user2) response: %+v", resp) + t.Fatalf("bad FindAtoms(source1/user2) response: %+v", resp) } } diff --git a/metadb/migrations/1_initialize_schema.down.sql b/metadb/migrations/1_initialize_schema.down.sql index 2242f00..698b5d0 100644 --- a/metadb/migrations/1_initialize_schema.down.sql +++ b/metadb/migrations/1_initialize_schema.down.sql @@ -1,5 +1,5 @@ -DROP INDEX idx_log_backup_id_and_dataset_name; +DROP INDEX idx_log_backup_id_and_dataset_id; DROP INDEX idx_log_backup_id; DROP INDEX idx_log_primary; DROP TABLE log; diff --git a/metadb/migrations/1_initialize_schema.up.sql b/metadb/migrations/1_initialize_schema.up.sql index 02a304e..5677d0f 100644 --- a/metadb/migrations/1_initialize_schema.up.sql +++ b/metadb/migrations/1_initialize_schema.up.sql @@ -6,13 +6,13 @@ CREATE TABLE log ( backup_id VARCHAR(128), backup_timestamp DATETIME, backup_host VARCHAR(128), - dataset_name VARCHAR(128), + dataset_id VARCHAR(128), dataset_source VARCHAR(128), atom_name VARCHAR(255), atom_full_path VARCHAR(255), atom_path VARCHAR(255) ); -CREATE UNIQUE INDEX idx_log_primary ON log (backup_id, dataset_name, atom_name); +CREATE UNIQUE INDEX idx_log_primary ON log (backup_id, dataset_id, atom_name); CREATE INDEX idx_log_backup_id ON log (backup_id); -CREATE INDEX idx_log_backup_id_and_dataset_name ON log (backup_id, dataset_name); +CREATE INDEX idx_log_backup_id_and_dataset_id ON log (backup_id, dataset_id); diff --git a/metadb/migrations/bindata.go b/metadb/migrations/bindata.go index 2323a2f..45ce2f5 100644 --- a/metadb/migrations/bindata.go +++ b/metadb/migrations/bindata.go @@ -46,7 +46,7 @@ func (fi bindataFileInfo) Sys() interface{} { } var __1_initialize_schemaDownSql = []byte(` -DROP INDEX idx_log_backup_id_and_dataset_name; +DROP INDEX idx_log_backup_id_and_dataset_id; DROP INDEX idx_log_backup_id; DROP INDEX idx_log_primary; DROP TABLE log; @@ -63,7 +63,7 @@ func _1_initialize_schemaDownSql() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "1_initialize_schema.down.sql", size: 123, mode: os.FileMode(420), modTime: time.Unix(1535012987, 0)} + info := bindataFileInfo{name: "1_initialize_schema.down.sql", size: 121, mode: os.FileMode(420), modTime: time.Unix(1560930730, 0)} a := &asset{bytes: bytes, info: info} return a, nil } @@ -76,16 +76,16 @@ CREATE TABLE log ( backup_id VARCHAR(128), backup_timestamp DATETIME, backup_host VARCHAR(128), - dataset_name VARCHAR(128), + dataset_id VARCHAR(128), dataset_source VARCHAR(128), atom_name VARCHAR(255), atom_full_path VARCHAR(255), atom_path VARCHAR(255) ); -CREATE UNIQUE INDEX idx_log_primary ON log (backup_id, dataset_name, atom_name); +CREATE UNIQUE INDEX idx_log_primary ON log (backup_id, dataset_id, atom_name); CREATE INDEX idx_log_backup_id ON log (backup_id); -CREATE INDEX idx_log_backup_id_and_dataset_name ON log (backup_id, dataset_name); +CREATE INDEX idx_log_backup_id_and_dataset_id ON log (backup_id, dataset_id); `) func _1_initialize_schemaUpSqlBytes() ([]byte, error) { @@ -98,7 +98,7 @@ func _1_initialize_schemaUpSql() (*asset, error) { return nil, err } - info := bindataFileInfo{name: "1_initialize_schema.up.sql", size: 576, mode: os.FileMode(420), modTime: time.Unix(1560809647, 0)} + info := bindataFileInfo{name: "1_initialize_schema.up.sql", size: 568, mode: os.FileMode(420), modTime: time.Unix(1560930732, 0)} a := &asset{bytes: bytes, info: info} return a, nil } diff --git a/metadb/server/service.go b/metadb/server/service.go index b2e47bc..30b53cd 100644 --- a/metadb/server/service.go +++ b/metadb/server/service.go @@ -17,7 +17,7 @@ type dbAtom struct { BackupID string BackupTimestamp time.Time BackupHost string - DatasetName string + DatasetID string DatasetSource string AtomName string AtomFullPath string @@ -31,13 +31,13 @@ func makeAtoms(backup tabacco.Backup, ds tabacco.Dataset) []dbAtom { // It is here that we 'materialize' the concept of Atom names // as paths, by concatenating source/dataset/atom and storing // it as the atom name. - path := filepath.Join(ds.Source, ds.Name, atom.Name) + path := filepath.Join(ds.Source, atom.Name) out = append(out, dbAtom{ BackupID: backup.ID, BackupTimestamp: backup.Timestamp, BackupHost: backup.Host, - DatasetName: ds.Name, + DatasetID: ds.ID, DatasetSource: ds.Source, AtomName: atom.Name, AtomPath: atom.Path, @@ -57,7 +57,7 @@ func (a *dbAtom) getBackup() *tabacco.Backup { func (a *dbAtom) getDataset() *tabacco.Dataset { return &tabacco.Dataset{ - Name: a.DatasetName, + ID: a.DatasetID, Source: a.DatasetSource, } } @@ -113,10 +113,10 @@ func groupByBackup(dbAtoms []*dbAtom) []*tabacco.Backup { tmp = make(map[string]*tabacco.Dataset) dsm[atom.BackupID] = tmp } - ds, ok := tmp[atom.DatasetName] + ds, ok := tmp[atom.DatasetID] if !ok { ds = atom.getDataset() - tmp[atom.DatasetName] = ds + tmp[atom.DatasetID] = ds b.Datasets = append(b.Datasets, ds) } @@ -161,7 +161,7 @@ var statements = map[string]string{ "insert_atom": ` INSERT INTO log ( backup_id, backup_timestamp, backup_host, - dataset_name, dataset_source, + dataset_id, dataset_source, atom_name, atom_path, atom_full_path ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ? @@ -183,7 +183,7 @@ func (s *Service) AddDataset(ctx context.Context, backup tabacco.Backup, ds taba dbAtom.BackupID, dbAtom.BackupTimestamp, dbAtom.BackupHost, - dbAtom.DatasetName, + dbAtom.DatasetID, dbAtom.DatasetSource, dbAtom.AtomName, dbAtom.AtomPath, @@ -223,7 +223,7 @@ func (s *Service) FindAtoms(ctx context.Context, req *tabacco.FindRequest) ([]*t q := fmt.Sprintf( `SELECT backup_id, backup_timestamp, backup_host, - dataset_name, dataset_source, + dataset_id, dataset_source, atom_name, atom_path FROM log WHERE %s ORDER BY backup_timestamp DESC`, @@ -240,7 +240,7 @@ func (s *Service) FindAtoms(ctx context.Context, req *tabacco.FindRequest) ([]*t var a dbAtom if err := rows.Scan( &a.BackupID, &a.BackupTimestamp, &a.BackupHost, - &a.DatasetName, &a.DatasetSource, + &a.DatasetID, &a.DatasetSource, &a.AtomName, &a.AtomPath, ); err != nil { log.Printf("bad row: %v", err) diff --git a/metadb/server/service_test.go b/metadb/server/service_test.go index 69fb972..27766c9 100644 --- a/metadb/server/service_test.go +++ b/metadb/server/service_test.go @@ -21,7 +21,6 @@ func addTestEntry(t *testing.T, svc *Service, backupID, host, dsName string) { Timestamp: time.Now(), }, tabacco.Dataset{ - Name: dsName, Source: "file", Atoms: []tabacco.Atom{ { diff --git a/repository_restic.go b/repository_restic.go index 0d1ad3c..0e71220 100644 --- a/repository_restic.go +++ b/repository_restic.go @@ -148,22 +148,24 @@ func (r *resticRepository) Prepare(ctx context.Context, rctx RuntimeContext, bac )) } +func resticBackupTags(backup *Backup, ds *Dataset) string { + return fmt.Sprintf("--tag dataset_id=%s --tag backup_id=%s", ds.ID, backup.ID) +} + func (r *resticRepository) BackupCmd(backup *Backup, ds *Dataset, sourcePaths []string) string { return fmt.Sprintf( - "%s backup --cleanup-cache --exclude-caches --one-file-system --tag %s --tag backup_id=%s %s", + "%s backup --cleanup-cache --exclude-caches --one-file-system %s %s", r.resticCmd(), - ds.Name, - backup.ID, + resticBackupTags(backup, ds), strings.Join(sourcePaths, " "), ) } func (r *resticRepository) getSnapshotID(ctx context.Context, rctx RuntimeContext, backup *Backup, ds *Dataset) (string, error) { data, err := rctx.Shell().Output(ctx, fmt.Sprintf( - "%s snapshots --json --tag backup_id=%s --tag %s", + "%s snapshots --json %s", r.resticCmd(), - backup.ID, - ds.Name, + resticBackupTags(backup, ds), )) if err != nil { return "", err @@ -200,17 +202,16 @@ func (r *resticRepository) RestoreCmd(ctx context.Context, rctx RuntimeContext, // A special path for stdin datasets that is likely to be unused by the // rest of the filesystem (the path namespace in Restic is global). func datasetStdinPath(ds *Dataset) string { - dsPath := filepath.Join(ds.Source, ds.Name) + dsPath := filepath.Join(ds.Source, ds.ID) return fmt.Sprintf("/STDIN_%s", strings.Replace(dsPath, "/", "_", -1)) } func (r *resticRepository) BackupStreamCmd(backup *Backup, ds *Dataset) string { fakePath := datasetStdinPath(ds) return fmt.Sprintf( - "%s backup --cleanup-cache --exclude-caches --tag %s --tag backup_id=%s --stdin --stdin-filename %s", + "%s backup --cleanup-cache --exclude-caches %s --stdin --stdin-filename %s", r.resticCmd(), - ds.Name, - backup.ID, + resticBackupTags(backup, ds), fakePath, ) } diff --git a/repository_restic_test.go b/repository_restic_test.go index f08b421..8f93f8c 100644 --- a/repository_restic_test.go +++ b/repository_restic_test.go @@ -155,7 +155,6 @@ func TestRestic(t *testing.T) { }, Datasets: []*DatasetSpec{ &DatasetSpec{ - Name: "files", Atoms: []Atom{ { Name: "f1", @@ -186,7 +185,6 @@ func TestRestic_Stream(t *testing.T) { Schedule: "@random_every 1h", Datasets: []*DatasetSpec{ &DatasetSpec{ - Name: "f1", Atoms: []Atom{ { Name: "f1", @@ -212,7 +210,6 @@ func TestRestic_Stream_Compress(t *testing.T) { Schedule: "@random_every 1h", Datasets: []*DatasetSpec{ &DatasetSpec{ - Name: "f1", Atoms: []Atom{ { Name: "f1", diff --git a/source.go b/source.go index efe8bda..9e7e7e7 100644 --- a/source.go +++ b/source.go @@ -14,7 +14,7 @@ import ( // DatasetSpec describes a dataset in the configuration. type DatasetSpec struct { - Name string `yaml:"name"` + //Name string `yaml:"name"` Atoms []Atom `yaml:"atoms"` AtomsCommand string `yaml:"atoms_command"` @@ -28,13 +28,13 @@ func (spec *DatasetSpec) Parse(ctx context.Context, src *SourceSpec) (*Dataset, if spec.AtomsCommand != "" { var cmdAtoms []Atom if err := runYAMLCommand(ctx, spec.AtomsCommand, &cmdAtoms); err != nil { - return nil, fmt.Errorf("source %s: dataset %s: error in atoms command: %v", src.Name, spec.Name, err) + return nil, fmt.Errorf("source %s: error in atoms command: %v", src.Name, err) } atoms = append(atoms, cmdAtoms...) } return &Dataset{ - Name: spec.Name, + ID: util.RandomID(), Source: src.Name, Atoms: atoms, }, nil @@ -42,9 +42,6 @@ func (spec *DatasetSpec) Parse(ctx context.Context, src *SourceSpec) (*Dataset, // Check syntactical validity of the DatasetSpec. func (spec *DatasetSpec) Check() error { - if spec.Name == "" { - return errors.New("dataset name is not set") - } if len(spec.Atoms) > 0 && spec.AtomsCommand != "" { return errors.New("can't specify both 'atoms' and 'atoms_command'") } @@ -102,7 +99,7 @@ func (spec *SourceSpec) Parse(ctx context.Context) ([]*Dataset, error) { for _, dspec := range dspecs { ds, err := dspec.Parse(ctx, spec) if err != nil { - return nil, fmt.Errorf("error parsing dataset %s: %v", dspec.Name, err) + return nil, fmt.Errorf("error parsing dataset: %v", err) } datasets = append(datasets, ds) } @@ -142,7 +139,7 @@ func (spec *SourceSpec) Check(handlers map[string]*HandlerSpec) error { merr := new(util.MultiError) for _, ds := range spec.Datasets { if err := ds.Check(); err != nil { - merr.Add(fmt.Errorf("dataset %s: %v", ds.Name, err)) + merr.Add(err) } } return merr.OrNil() diff --git a/testdata/sources/source.yml b/testdata/sources/source.yml index f08b9ad..f0c7875 100644 --- a/testdata/sources/source.yml +++ b/testdata/sources/source.yml @@ -4,5 +4,4 @@ schedule: "@random_every 2m" params: path: /usr/share/misc datasets: - - name: magic - atoms: [{name: magic}] + - atoms: [{name: magic}] diff --git a/types.go b/types.go index 5ed33a2..b3b4c27 100644 --- a/types.go +++ b/types.go @@ -77,7 +77,8 @@ type Atom struct { type Dataset struct { // Name of the dataset (path-like). Will be prepended to atom // paths. - Name string `json:"name"` + //Name string `json:"name"` + ID string `json:"id"` // Source is the name of the source that created this Dataset, // stored so that the restore knows what to do. -- GitLab From 92df87767e9e2a99663ac26458c7b500e44b71bc Mon Sep 17 00:00:00 2001 From: ale Date: Wed, 19 Jun 2019 09:15:07 +0100 Subject: [PATCH 09/12] Install liblz4-tool for tests --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 01f6048..dafc98a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -12,6 +12,7 @@ test: image: "ai/test:go" script: - "./install_restic_for_tests.sh" + - "apt-get install -y liblz4-tool" - "go-test-runner ." except: - master -- GitLab From 434f164b20615b2bab493d08b420aaf152c5162f Mon Sep 17 00:00:00 2001 From: ale Date: Wed, 19 Jun 2019 10:44:09 +0100 Subject: [PATCH 10/12] Add a 'query' command to query the metadb --- cmd/tabacco/query.go | 89 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 cmd/tabacco/query.go diff --git a/cmd/tabacco/query.go b/cmd/tabacco/query.go new file mode 100644 index 0000000..30a1f29 --- /dev/null +++ b/cmd/tabacco/query.go @@ -0,0 +1,89 @@ +package main + +import ( + "context" + "encoding/json" + "errors" + "flag" + "log" + "os" + "time" + + "github.com/google/subcommands" + + "git.autistici.org/ai3/tools/tabacco" + mdbc "git.autistici.org/ai3/tools/tabacco/metadb/client" +) + +var rpcTimeout = 120 * time.Second + +type queryCommand struct { + configPath string + host string + numVersions int +} + +func (c *queryCommand) Name() string { return "query" } +func (c *queryCommand) Synopsis() string { return "query the backup metadata database" } +func (c *queryCommand) Usage() string { + return `query [] + Query the backup metadata database. + +` +} + +func (c *queryCommand) SetFlags(f *flag.FlagSet) { + f.StringVar(&c.configPath, "config", "/etc/tabacco/agent.yml", "configuration `file`") + f.StringVar(&c.host, "host", "", "filter by host") + f.IntVar(&c.numVersions, "num-versions", 1, "return the most recent `N` versions") +} + +func (c *queryCommand) buildRequest(f *flag.FlagSet) (*tabacco.FindRequest, error) { + if f.NArg() != 1 { + return nil, errors.New("error: wrong number of arguments") + } + return &tabacco.FindRequest{ + Pattern: f.Arg(0), + Host: c.host, + NumVersions: c.numVersions, + }, nil +} + +func (c *queryCommand) Execute(ctx context.Context, f *flag.FlagSet, args ...interface{}) subcommands.ExitStatus { + req, err := c.buildRequest(f) + if err != nil { + log.Printf("error in request: %v", err) + return subcommands.ExitUsageError + } + + // Parse configuration and connect to the metadata store. + config, err := tabacco.ReadConfig(c.configPath) + if err != nil { + log.Printf("error reading config: %v", err) + return subcommands.ExitFailure + } + store, err := mdbc.New(config.MetadataStoreBackend) + if err != nil { + log.Printf("error in metadata client config: %v", err) + return subcommands.ExitFailure + } + + // Make the RPC. + rctx, cancel := context.WithTimeout(ctx, rpcTimeout) + defer cancel() + result, err := store.FindAtoms(rctx, req) + + if err != nil { + log.Printf("FindAtoms() error: %v", err) + return subcommands.ExitFailure + } + + data, _ := json.MarshalIndent(result, "", " ") + os.Stdout.Write(data) + + return subcommands.ExitSuccess +} + +func init() { + subcommands.Register(&queryCommand{}, "") +} -- GitLab From bb99d51782c6c3edfa767c3536b4b69e63b2e269 Mon Sep 17 00:00:00 2001 From: ale Date: Wed, 19 Jun 2019 11:46:40 +0100 Subject: [PATCH 11/12] Simplify internal interfaces Clear separation between configuration and its parsed results, which are now maintained in the RuntimeContext to kept them consistent within backup/restore jobs. --- agent.go | 3 +- config.go | 150 +++++++++++++++++--------------------- config_test.go | 20 ++--- handler_file.go | 8 +- handler_pipe.go | 8 +- jobs/job.go | 2 +- manager.go | 27 +++---- manager_test.go | 4 +- repository_restic_test.go | 4 +- types.go | 4 +- 10 files changed, 106 insertions(+), 124 deletions(-) diff --git a/agent.go b/agent.go index 21e2ba0..a121fa9 100644 --- a/agent.go +++ b/agent.go @@ -35,7 +35,8 @@ func NewAgent(ctx context.Context, configMgr *ConfigManager, ms MetadataStore) ( case <-stopCh: return case <-notifyCh: - schedule, err := makeSchedule(ctx, mgr, configMgr.getSourceSpecs(), configMgr.getSeed()) + config := configMgr.current() + schedule, err := makeSchedule(ctx, mgr, config.SourceSpecs(), config.Seed()) if err != nil { log.Printf("error updating scheduler: %v", err) } diff --git a/config.go b/config.go index a424e76..bcbbe7c 100644 --- a/config.go +++ b/config.go @@ -23,7 +23,7 @@ var defaultSeedFile = "/var/tmp/.tabacco_scheduler_seed" // holds it all together. type Config struct { Hostname string `yaml:"hostname"` - Queue jobs.QueueSpec `yaml:"queue_config"` + Queue *jobs.QueueSpec `yaml:"queue_config"` Repository RepositorySpec `yaml:"repository"` DryRun bool `yaml:"dry_run"` DefaultNiceLevel int `yaml:"default_nice_level"` @@ -37,30 +37,54 @@ type Config struct { } // RuntimeContext provides access to runtime objects whose lifetime is -// ultimately tied to the configuration. +// ultimately tied to the configuration. Configuration can change +// during the lifetime of the process, but we want backup jobs to have +// a consistent view of the configuration while they execute, so +// access to the current version of the configuration is controlled to +// the ConfigManager. type RuntimeContext interface { Shell() *Shell + Repo() Repository + QueueSpec() *jobs.QueueSpec + Seed() int64 + WorkDir() string + SourceSpecs() []*SourceSpec + FindSource(string) *SourceSpec + HandlerSpec(string) *HandlerSpec Close() } -// The set of objects that are created from a Config. Can change, so -// its access is controlled by the ConfigManager. However it stays -// fixed during a running backup. -// -// This is an implementation of RuntimeContext. -type runtimeAssets struct { - handlerMap map[string]*HandlerSpec - repo Repository - seed int64 - shell *Shell +// The set of objects that are created from a Config and that the main +// code cares about. +type parsedConfig struct { + handlerMap map[string]*HandlerSpec + sourceSpecs []*SourceSpec + sourceSpecsByName map[string]*SourceSpec + queue *jobs.QueueSpec + + repo Repository + seed int64 + shell *Shell + workDir string } -func (a *runtimeAssets) Close() { +func (a *parsedConfig) Close() { a.repo.Close() // nolint } -func (a *runtimeAssets) Shell() *Shell { - return a.shell +func (a *parsedConfig) Shell() *Shell { return a.shell } +func (a *parsedConfig) Repo() Repository { return a.repo } +func (a *parsedConfig) QueueSpec() *jobs.QueueSpec { return a.queue } +func (a *parsedConfig) Seed() int64 { return a.seed } +func (a *parsedConfig) WorkDir() string { return a.workDir } +func (a *parsedConfig) SourceSpecs() []*SourceSpec { return a.sourceSpecs } + +func (a *parsedConfig) HandlerSpec(name string) *HandlerSpec { + return a.handlerMap[name] +} + +func (a *parsedConfig) FindSource(name string) *SourceSpec { + return a.sourceSpecsByName[name] } func buildHandlerMap(specs []*HandlerSpec) map[string]*HandlerSpec { @@ -77,7 +101,7 @@ func buildHandlerMap(specs []*HandlerSpec) map[string]*HandlerSpec { return m } -func (c *Config) parse() (*runtimeAssets, error) { +func (c *Config) parse() (*parsedConfig, error) { shell := NewShell(c.DryRun) shell.SetNiceLevel(c.DefaultNiceLevel) shell.SetIOClass(c.DefaultIOClass) @@ -98,21 +122,20 @@ func (c *Config) parse() (*runtimeAssets, error) { // Sources that fail the check are removed from the // SourceSpecs array. We also check that sources have unique // names. - tmp := make(map[string]struct{}) + srcMap := make(map[string]*SourceSpec) var srcs []*SourceSpec for _, spec := range c.SourceSpecs { if err := spec.Check(handlerMap); err != nil { merr.Add(fmt.Errorf("source %s: %v", spec.Name, err)) continue } - if _, ok := tmp[spec.Name]; ok { + if _, ok := srcMap[spec.Name]; ok { merr.Add(fmt.Errorf("duplicated source %s", spec.Name)) continue } - tmp[spec.Name] = struct{}{} + srcMap[spec.Name] = spec srcs = append(srcs, spec) } - c.SourceSpecs = srcs // Read (or create) the seed file. seedFile := defaultSeedFile @@ -121,11 +144,15 @@ func (c *Config) parse() (*runtimeAssets, error) { } seed := mustGetSeed(seedFile) - return &runtimeAssets{ - shell: shell, - repo: repo, - handlerMap: handlerMap, - seed: seed, + return &parsedConfig{ + handlerMap: handlerMap, + sourceSpecs: srcs, + sourceSpecsByName: srcMap, + queue: c.Queue, + shell: shell, + repo: repo, + seed: seed, + workDir: c.WorkDir, }, merr.OrNil() } @@ -251,8 +278,7 @@ func foreachYAMLFile(dir string, f func(string) error) error { // unregister). type ConfigManager struct { mx sync.Mutex - config *Config - assets *runtimeAssets + parsed *parsedConfig // Listeners are notified on every reload. notifyCh chan struct{} @@ -285,8 +311,8 @@ func NewConfigManager(config *Config) (*ConfigManager, error) { // Reload the configuration (at least, the parts of it that can be // dynamically reloaded). func (m *ConfigManager) Reload(config *Config) error { - assets, err := config.parse() - if assets == nil { + parsed, err := config.parse() + if parsed == nil { return err } else if err != nil { log.Printf("warning: errors in configuration: %v", err) @@ -296,13 +322,12 @@ func (m *ConfigManager) Reload(config *Config) error { // goroutine, that does not hold the lock). m.mx.Lock() defer m.mx.Unlock() - if m.assets != nil { - m.assets.Close() // nolint + if m.parsed != nil { + m.parsed.Close() // nolint } - log.Printf("loaded new config: %d handlers, %d sources", len(assets.handlerMap), len(config.SourceSpecs)) - m.assets = assets - m.config = config + log.Printf("loaded new config: %d handlers, %d sources", len(parsed.handlerMap), len(parsed.sourceSpecs)) + m.parsed = parsed m.notifyCh <- struct{}{} return nil } @@ -311,8 +336,8 @@ func (m *ConfigManager) Reload(config *Config) error { func (m *ConfigManager) Close() { m.mx.Lock() close(m.notifyCh) - if m.assets != nil { - m.assets.Close() + if m.parsed != nil { + m.parsed.Close() } m.mx.Unlock() } @@ -330,57 +355,16 @@ func (m *ConfigManager) Notify() <-chan struct{} { return ch } -// Captures current runtime assets into a RuntimeContext -func (m *ConfigManager) newRuntimeContext() RuntimeContext { - return m.assets -} - -func (m *ConfigManager) getHandlerSpec(name string) (*HandlerSpec, bool) { - m.mx.Lock() - defer m.mx.Unlock() - h, ok := m.assets.handlerMap[name] - return h, ok -} - -func (m *ConfigManager) getRepository() Repository { - m.mx.Lock() - defer m.mx.Unlock() - return m.assets.repo -} - -func (m *ConfigManager) getQueueSpec() jobs.QueueSpec { - m.mx.Lock() - defer m.mx.Unlock() - return m.config.Queue -} - -func (m *ConfigManager) getSourceSpecs() []*SourceSpec { - m.mx.Lock() - defer m.mx.Unlock() - return m.config.SourceSpecs -} - -func (m *ConfigManager) findSource(name string) *SourceSpec { - m.mx.Lock() - defer m.mx.Unlock() - for _, src := range m.config.SourceSpecs { - if src.Name == name { - return src - } - } - return nil -} - -func (m *ConfigManager) getSeed() int64 { - m.mx.Lock() - defer m.mx.Unlock() - return m.assets.seed +// NewRuntimeContext returns a new RuntimeContext, capturing current +// configuration and runtime assets. +func (m *ConfigManager) NewRuntimeContext() RuntimeContext { + return m.current() } -func (m *ConfigManager) getWorkDir() string { +func (m *ConfigManager) current() *parsedConfig { m.mx.Lock() defer m.mx.Unlock() - return m.config.WorkDir + return m.parsed } func mustGetSeed(path string) int64 { diff --git a/config_test.go b/config_test.go index d516c7f..80933e9 100644 --- a/config_test.go +++ b/config_test.go @@ -36,8 +36,8 @@ func TestConfigManager(t *testing.T) { defer mgr.Close() // Test one of the accessor methods. - if s := mgr.getSourceSpecs(); len(s) != 1 { - t.Fatalf("getSourceSpecs() bad result: %+v", s) + if s := mgr.current().SourceSpecs(); len(s) != 1 { + t.Fatalf("current().SourceSpecs() bad result: %+v", s) } // Test the Notify() mechanism by checking that it triggers @@ -68,7 +68,7 @@ func TestConfig_Parse(t *testing.T) { type testdata struct { config *Config expectedOK bool - checkFn func(*runtimeAssets, []*Dataset) error + checkFn func([]*Dataset) error } tdd := []testdata{ // The following tests cover a few ways to generate @@ -166,29 +166,29 @@ func TestConfig_Parse(t *testing.T) { "password": "hello", } - ra, err := td.config.parse() + parsed, err := td.config.parse() if err != nil && td.expectedOK { t.Errorf("unexpected error for config %+v: %v", td.config, err) } else if err == nil && !td.expectedOK { t.Errorf("missing error for config %+v", td.config) } else { - datasets, err := parseAllSources(ra, td.config.SourceSpecs) + datasets, err := parseAllSources(parsed.SourceSpecs()) if err != nil { t.Errorf("failed to parse sources %+v: %v", td.config.SourceSpecs, err) } if td.checkFn != nil { - if err := td.checkFn(ra, datasets); err != nil { + if err := td.checkFn(datasets); err != nil { t.Errorf("check failed for config %+v: %v", td.config, err) } } } - if ra != nil { - ra.Close() + if parsed != nil { + parsed.Close() } } } -func parseAllSources(ra *runtimeAssets, specs []*SourceSpec) ([]*Dataset, error) { +func parseAllSources(specs []*SourceSpec) ([]*Dataset, error) { var out []*Dataset for _, spec := range specs { ds, err := spec.Parse(context.Background()) @@ -200,7 +200,7 @@ func parseAllSources(ra *runtimeAssets, specs []*SourceSpec) ([]*Dataset, error) return out, nil } -func checkTwoUserAccountsAtoms(ra *runtimeAssets, datasets []*Dataset) error { +func checkTwoUserAccountsAtoms(datasets []*Dataset) error { var numAtoms int for _, ds := range datasets { if ds.ID == "" { diff --git a/handler_file.go b/handler_file.go index 4fe2073..1da9487 100644 --- a/handler_file.go +++ b/handler_file.go @@ -30,14 +30,14 @@ func atomPath(a Atom, root string) string { return filepath.Join(root, a.Name) } -func (h *fileHandler) BackupJob(rctx RuntimeContext, repo Repository, backup *Backup, ds *Dataset) jobs.Job { +func (h *fileHandler) BackupJob(rctx RuntimeContext, backup *Backup, ds *Dataset) jobs.Job { // Build the list of filesystem paths to pass to the // Repository.Backup method. var paths []string for _, a := range ds.Atoms { paths = append(paths, atomPath(a, h.path)) } - cmd := repo.BackupCmd(backup, ds, paths) + cmd := rctx.Repo().BackupCmd(backup, ds, paths) // Now pass those paths to the Backup method. return jobs.JobFunc(func(ctx context.Context) error { @@ -45,7 +45,7 @@ func (h *fileHandler) BackupJob(rctx RuntimeContext, repo Repository, backup *Ba }) } -func (h *fileHandler) RestoreJob(rctx RuntimeContext, repo Repository, backup *Backup, ds *Dataset, target string) jobs.Job { +func (h *fileHandler) RestoreJob(rctx RuntimeContext, backup *Backup, ds *Dataset, target string) jobs.Job { // Build the list of filesystem paths to pass to the // Repository.Backup method. var paths []string @@ -55,7 +55,7 @@ func (h *fileHandler) RestoreJob(rctx RuntimeContext, repo Repository, backup *B // Call the repo Restore method. return jobs.JobFunc(func(ctx context.Context) error { - cmd, err := repo.RestoreCmd(ctx, rctx, backup, ds, paths, target) + cmd, err := rctx.Repo().RestoreCmd(ctx, rctx, backup, ds, paths, target) if err != nil { return err } diff --git a/handler_pipe.go b/handler_pipe.go index 5f5c174..8e06e23 100644 --- a/handler_pipe.go +++ b/handler_pipe.go @@ -60,21 +60,21 @@ func newPipeHandler(name string, params Params) (Handler, error) { return h, nil } -func (h *pipeHandler) BackupJob(rctx RuntimeContext, repo Repository, backup *Backup, ds *Dataset) jobs.Job { +func (h *pipeHandler) BackupJob(rctx RuntimeContext, backup *Backup, ds *Dataset) jobs.Job { cmd := fmt.Sprintf( "(%s)%s | %s", expandVars(h.backupCmd, backup, ds), h.compressSuffix(), - repo.BackupStreamCmd(backup, ds), + rctx.Repo().BackupStreamCmd(backup, ds), ) return jobs.JobFunc(func(ctx context.Context) error { return rctx.Shell().Run(ctx, cmd) }) } -func (h *pipeHandler) RestoreJob(rctx RuntimeContext, repo Repository, backup *Backup, ds *Dataset, target string) jobs.Job { +func (h *pipeHandler) RestoreJob(rctx RuntimeContext, backup *Backup, ds *Dataset, target string) jobs.Job { return jobs.JobFunc(func(ctx context.Context) error { - restoreCmd, err := repo.RestoreStreamCmd(ctx, rctx, backup, ds, getWorkDir(ctx)) + restoreCmd, err := rctx.Repo().RestoreStreamCmd(ctx, rctx, backup, ds, getWorkDir(ctx)) if err != nil { return err } diff --git a/jobs/job.go b/jobs/job.go index c825292..42571ee 100644 --- a/jobs/job.go +++ b/jobs/job.go @@ -191,7 +191,7 @@ type QueueSpec struct { // NewQueueManager returns a new QueueManager with the provided // configuration. -func NewQueueManager(spec QueueSpec) *QueueManager { +func NewQueueManager(spec *QueueSpec) *QueueManager { q := make(map[string]chan struct{}) for name, n := range spec.Workers { q[name] = make(chan struct{}, n) diff --git a/manager.go b/manager.go index 41fbe24..7d89dc2 100644 --- a/manager.go +++ b/manager.go @@ -27,7 +27,7 @@ type tabaccoManager struct { func NewManager(ctx context.Context, configMgr *ConfigManager, ms MetadataStore) (Manager, error) { // If we can't create a workdirManager, it probably means we // don't have permissions to the WorkDir, which is bad. - wm, err := newWorkdirManager(configMgr.getWorkDir()) + wm, err := newWorkdirManager(configMgr.current().WorkDir()) if err != nil { return nil, err } @@ -35,7 +35,7 @@ func NewManager(ctx context.Context, configMgr *ConfigManager, ms MetadataStore) // Note: the queue configuration won't be reloaded. return &tabaccoManager{ ExclusiveLockManager: jobs.NewExclusiveLockManager(), - QueueManager: jobs.NewQueueManager(configMgr.getQueueSpec()), + QueueManager: jobs.NewQueueManager(configMgr.current().QueueSpec()), StateManager: jobs.NewStateManager(), workdirManager: wm, @@ -82,9 +82,8 @@ func (m *tabaccoManager) withMetadata(j jobs.Job, backup *Backup, ds *Dataset) j // operation: we need to wait for it to complete to avoid running the // backup tasks too soon. func (m *tabaccoManager) prepareBackupJob(rctx RuntimeContext, backup *Backup) jobs.Job { - repo := m.configMgr.getRepository() return jobs.JobFunc(func(ctx context.Context) error { - return repo.Init(ctx, rctx) + return rctx.Repo().Init(ctx, rctx) //log.Printf("preparing backup %s", backup.ID) //return repo.Prepare(ctx, backup) }) @@ -115,8 +114,8 @@ func (m *tabaccoManager) makeBackupJob(ctx context.Context, rctx RuntimeContext, if err != nil { return nil, err } - hspec, ok := m.configMgr.getHandlerSpec(src.Handler) - if !ok { + hspec := rctx.HandlerSpec(src.Handler) + if hspec == nil { return nil, fmt.Errorf("unknown handler '%s'", src.Handler) } h, err := hspec.Parse(src) @@ -129,10 +128,9 @@ func (m *tabaccoManager) makeBackupJob(ctx context.Context, rctx RuntimeContext, // // TODO: get the timeout from the SourceSpec. var backupJobs []jobs.Job - repo := m.configMgr.getRepository() for _, ds := range dsl { backupJobs = append(backupJobs, m.withMetadata( - h.BackupJob(rctx, repo, backup, ds), + h.BackupJob(rctx, backup, ds), backup, ds, )) @@ -177,7 +175,7 @@ func (m *tabaccoManager) BackupJob(ctx context.Context, src *SourceSpec) (*Backu b := newBackup("") // Create a RuntimeContext. - rctx := m.configMgr.newRuntimeContext() + rctx := m.configMgr.NewRuntimeContext() j, err := m.makeBackupJob(ctx, rctx, b, src) return b, j, err @@ -195,8 +193,8 @@ func (m *tabaccoManager) Backup(ctx context.Context, src *SourceSpec) (*Backup, func (m *tabaccoManager) makeRestoreJob(rctx RuntimeContext, backup *Backup, src *SourceSpec, dsl []*Dataset, target string) (jobs.Job, error) { // Just need the Handler. - hspec, ok := m.configMgr.getHandlerSpec(src.Handler) - if !ok { + hspec := rctx.HandlerSpec(src.Handler) + if hspec == nil { return nil, fmt.Errorf("unknown handler '%s'", src.Handler) } h, err := hspec.Parse(src) @@ -208,11 +206,10 @@ func (m *tabaccoManager) makeRestoreJob(rctx RuntimeContext, backup *Backup, src // doBackupDataset() that binds together the context, backup, // ds and target via the closure. var restoreJobs []jobs.Job - repo := m.configMgr.getRepository() for _, ds := range dsl { restoreJobs = append( restoreJobs, - h.RestoreJob(rctx, repo, backup, ds, target), + h.RestoreJob(rctx, backup, ds, target), ) } @@ -267,7 +264,7 @@ func (m *tabaccoManager) RestoreJob(ctx context.Context, req *FindRequest, targe } // Create a RuntimeContext. - rctx := m.configMgr.newRuntimeContext() + rctx := m.configMgr.NewRuntimeContext() var restoreJobs []jobs.Job merr := new(util.MultiError) @@ -275,7 +272,7 @@ func (m *tabaccoManager) RestoreJob(ctx context.Context, req *FindRequest, targe // Group the datasets by source, find the source and create the restore jobs. for srcName, dsl := range groupDatasetsBySource(b.Datasets) { - src := m.configMgr.findSource(srcName) + src := rctx.FindSource(srcName) if src == nil { merr.Add(fmt.Errorf("unknown source '%s'", srcName)) continue diff --git a/manager_test.go b/manager_test.go index 3150540..8e4ac8b 100644 --- a/manager_test.go +++ b/manager_test.go @@ -202,7 +202,7 @@ func TestManager_Backup(t *testing.T) { DatasetsCommand: "echo '[{name: users, atoms: [{name: user1}, {name: user2}]}]'", }, } - queueSpec := jobs.QueueSpec{ + queueSpec := &jobs.QueueSpec{ Workers: map[string]int{"backup": 2}, } @@ -225,7 +225,7 @@ func TestManager_Backup(t *testing.T) { } defer m.Close() - for _, src := range configMgr.getSourceSpecs() { + for _, src := range configMgr.current().SourceSpecs() { backup, err := m.Backup(context.TODO(), src) if err != nil { t.Fatal(err) diff --git a/repository_restic_test.go b/repository_restic_test.go index 8f93f8c..6b34991 100644 --- a/repository_restic_test.go +++ b/repository_restic_test.go @@ -67,7 +67,7 @@ func runResticTest(t *testing.T, tmpdir string, source *SourceSpec, restorePatte }, }, } - queueSpec := jobs.QueueSpec{ + queueSpec := &jobs.QueueSpec{ Workers: map[string]int{"backup": 2, "restore": 1}, } sourceSpecs := []*SourceSpec{source} @@ -90,7 +90,7 @@ func runResticTest(t *testing.T, tmpdir string, source *SourceSpec, restorePatte } defer m.Close() - backup, err := m.Backup(context.TODO(), configMgr.getSourceSpecs()[0]) + backup, err := m.Backup(context.TODO(), configMgr.current().SourceSpecs()[0]) if err != nil { t.Fatal(err) } diff --git a/types.go b/types.go index b3b4c27..12767c6 100644 --- a/types.go +++ b/types.go @@ -118,8 +118,8 @@ type MetadataStore interface { // Handler can backup and restore a specific class of datasets. type Handler interface { - BackupJob(RuntimeContext, Repository, *Backup, *Dataset) jobs.Job - RestoreJob(RuntimeContext, Repository, *Backup, *Dataset, string) jobs.Job + BackupJob(RuntimeContext, *Backup, *Dataset) jobs.Job + RestoreJob(RuntimeContext, *Backup, *Dataset, string) jobs.Job } // Repository is the interface to a remote repository. -- GitLab From 25f279d0250586fe7fb2380a87659a4daa4281e8 Mon Sep 17 00:00:00 2001 From: ale Date: Wed, 19 Jun 2019 18:59:55 +0100 Subject: [PATCH 12/12] Set the 'pipefail' option on shell commands This way we can detect failure of dump / restore commands when used with the 'pipe' handler (when we're using shell pipes for composition). --- shell.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/shell.go b/shell.go index 183c7a8..f26c3e8 100644 --- a/shell.go +++ b/shell.go @@ -78,7 +78,10 @@ func (s *Shell) command(ctx context.Context, arg string) *exec.Cmd { if s.dryRun { args = []string{"/bin/echo", arg} } else { - args = []string{"/bin/sh", "-c", arg} + // The pipefail option is necessary for us to detect + // when the first command in a pipeline fails, but we + // need bash for that. + args = []string{"/bin/bash", "-o", "pipefail", "-c", arg} } if s.niceLevel != 0 { -- GitLab