diff --git a/files.go b/files.go index 4cfae94d0a329265346f79ee66336d2f8339b331..6cd3f537563bb66f0ea5ab73b4cacbb07042a444 100644 --- a/files.go +++ b/files.go @@ -1,61 +1,105 @@ package liber import ( + "fmt" "os" "path/filepath" "strings" + + "git.autistici.org/ale/liber/util" ) +// FileStorage exposes a read-only filesystem hierarchy as a root for +// relative paths (so that you can move archives around while the +// database is still valid). Calls will still accept absolute paths +// for backwards compatibility. type FileStorage struct { - Root string - Nesting int + Root string } -func NewFileStorage(root string, nesting int) *FileStorage { +func NewFileStorage(root string) *FileStorage { return &FileStorage{ - Root: root, - Nesting: nesting, + Root: root, } } -// Path of the file corresponding to the given key, relative to the -// root directory. -func (s *FileStorage) Path(key string) string { - var parts []string - for i := 0; i < s.Nesting; i++ { - if i >= len(key) { - break - } - parts = append(parts, key[i:i+1]) +// Return the absolute path of a file, given its relative path. +func (s *FileStorage) Abs(path string) string { + if strings.HasPrefix(path, "/") { + return path } - parts = append(parts, key) - return filepath.Join(parts...) + return filepath.Join(s.Root, path) +} + +// Return the relative path of a file with respect to the storage +// root. +func (s *FileStorage) Rel(abspath string) (string, error) { + return filepath.Rel(s.Root, abspath) } -// Create a new file for the given key. +// Create a new file for the given key. Directories containing the +// output file will be automatically created. func (s *FileStorage) Create(path string) (*os.File, error) { - p := filepath.Join(s.Root, path) - if err := os.MkdirAll(filepath.Dir(p), 0700); err != nil { + path = s.Abs(path) + if err := os.MkdirAll(filepath.Dir(path), 0700); err != nil { return nil, err } - return os.Create(p) + return os.Create(path) +} + +// Exists returns true if the specified file exists. +func (s *FileStorage) Exists(path string) bool { + _, err := os.Stat(s.Abs(path)) + return err == nil } // Open a file. func (s *FileStorage) Open(path string) (*os.File, error) { - if strings.HasPrefix(path, "/") { - return os.Open(path) - } - return os.Open(filepath.Join(s.Root, path)) + return os.Open(s.Abs(path)) } // Rename oldpath to newpath. func (s *FileStorage) Rename(oldpath, newpath string) error { - if !strings.HasPrefix(oldpath, "/") { - oldpath = filepath.Join(s.Root, oldpath) + return os.Rename(s.Abs(oldpath), s.Abs(newpath)) +} + +func (s *FileStorage) Walk(w *util.Walker, fn filepath.WalkFunc) { + w.Walk(s.Root, func(path string, info os.FileInfo, ferr error) error { + if ferr != nil { + return nil + } + relpath, err := s.Rel(path) + if err != nil { + return fmt.Errorf("%s is outside %s (?)", path, s.Root) + } + return fn(relpath, info, nil) + }) +} + +// RWFileStorage adds a read-write API on top of a FileStorage, based +// on unique keys and directory sharding. +type RWFileStorage struct { + *FileStorage + Nesting int +} + +func NewRWFileStorage(root string, nesting int) *RWFileStorage { + return &RWFileStorage{ + FileStorage: NewFileStorage(root), + Nesting: nesting, } - if !strings.HasPrefix(newpath, "/") { - newpath = filepath.Join(s.Root, newpath) +} + +// Path of the file corresponding to the given key, relative to the +// root directory. +func (s *RWFileStorage) Path(key string) string { + var parts []string + for i := 0; i < s.Nesting; i++ { + if i >= len(key) { + break + } + parts = append(parts, key[i:i+1]) } - return os.Rename(oldpath, newpath) + parts = append(parts, key) + return filepath.Join(parts...) } diff --git a/sync.go b/sync.go index 647586da9db93fda19c5bff6ce7feb1bb66e935a..1787187febe4b95cb2ee1e7e2e712382967a95e0 100644 --- a/sync.go +++ b/sync.go @@ -247,7 +247,7 @@ func (db *Database) Sync(remote SyncClient) error { type syncServer struct { db *Database - storage *FileStorage + storage *RWFileStorage } func (l *syncServer) handleDiffRequest(w http.ResponseWriter, req *http.Request) { @@ -355,7 +355,7 @@ func (l *syncServer) handleSyncUpload(w http.ResponseWriter, req *http.Request) w.WriteHeader(200) } -func savePart(req *http.Request, fieldname string, storage *FileStorage, outname string) (int64, *multipart.FileHeader, error) { +func savePart(req *http.Request, fieldname string, storage *RWFileStorage, outname string) (int64, *multipart.FileHeader, error) { f, hdr, err := req.FormFile(fieldname) if err != nil { return 0, nil, err diff --git a/sync_test.go b/sync_test.go index aeb3cc7018b34b5d8ce0f36c80b51422896d5aaa..eebcc0cc68a7e65a8e6c8f36fdbf7b1113e93657 100644 --- a/sync_test.go +++ b/sync_test.go @@ -11,7 +11,7 @@ import ( ) func newTestSyncHttpServer(db *Database, updir string) *httptest.Server { - localsrv := &syncServer{db, &FileStorage{Root: updir, Nesting: 2}} + localsrv := &syncServer{db, NewRWFileStorage(updir, 2)} mux := http.NewServeMux() mux.HandleFunc("/api/sync/upload", localsrv.handleSyncUpload) diff --git a/update.go b/update.go index 46035cb294d231f8b16daf1cfdbff9193ca16171..87e0d62e900fc95e2f7dfd7bb1612ff101a497a3 100644 --- a/update.go +++ b/update.go @@ -19,12 +19,13 @@ type MetadataChooserFunc func(string, []*Metadata) *Metadata type fileData struct { source int path string + relpath string filetype string id BookId info os.FileInfo } -func (f fileData) toLiberFile(haserr bool) *File { +func (f fileData) toLiberFile(storage *FileStorage, haserr bool) (*File, error) { return &File{ Path: f.path, FileType: f.filetype, @@ -32,7 +33,7 @@ func (f fileData) toLiberFile(haserr bool) *File { Size: f.info.Size(), Id: f.id, Error: haserr, - } + }, nil } type fileAndBook struct { @@ -40,8 +41,14 @@ type fileAndBook struct { b *Book } -func dbFileScanner(db *Database, fileCh chan fileData) { - for iter := db.Scan(FileBucket); iter.Valid(); iter.Next() { +type updateContext struct { + db *Database + storage *FileStorage + chooser MetadataChooserFunc +} + +func (uc *updateContext) dbFileScanner(fileCh chan fileData) { + for iter := uc.db.Scan(FileBucket); iter.Valid(); iter.Next() { var f File if err := iter.Value(&f); err != nil { continue @@ -54,12 +61,8 @@ func dbFileScanner(db *Database, fileCh chan fileData) { } } -func localFileScanner(db *Database, basedir string, fileCh chan fileData) { - util.NewDefaultWalker().Walk(basedir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return nil - } - +func (uc *updateContext) localFileScanner(basedir string, fileCh chan fileData) { + uc.storage.Walk(util.NewDefaultWalker(), func(path string, info os.FileInfo, err error) error { fileCh <- fileData{ source: SourceFS, path: path, @@ -69,7 +72,7 @@ func localFileScanner(db *Database, basedir string, fileCh chan fileData) { }) } -func differ(db *Database, basedir string) chan fileData { +func (uc *updateContext) differ(basedir string) chan fileData { fileCh := make(chan fileData, 100) outCh := make(chan fileData, 100) var wg sync.WaitGroup @@ -77,11 +80,11 @@ func differ(db *Database, basedir string) chan fileData { // Start two sources in parallel and send their output to fileCh. go func() { - localFileScanner(db, basedir, fileCh) + uc.localFileScanner(basedir, fileCh) wg.Done() }() go func() { - dbFileScanner(db, fileCh) + uc.dbFileScanner(fileCh) wg.Done() }() // Once they are done, close the channel. @@ -110,8 +113,8 @@ func differ(db *Database, basedir string) chan fileData { } for path, value := range tmp { if value == SourceDB { - log.Printf("removing file %s", path) - db.DeleteFile(path) + log.Printf("file %s has been removed", path) + uc.db.DeleteFile(path) } } close(outCh) @@ -119,15 +122,15 @@ func differ(db *Database, basedir string) chan fileData { return outCh } -func extractor(db *Database, chooser MetadataChooserFunc, fileCh chan fileData, outCh chan fileAndBook) { +func (uc *updateContext) extractor(fileCh chan fileData, outCh chan fileAndBook) { for f := range fileCh { - if oldfile, err := db.GetFile(f.path); err == nil { + if oldfile, err := uc.db.GetFile(f.path); err == nil { if !oldfile.HasChanged(f.info) { continue } f.id = oldfile.Id } - book, filetype, err := parseMeta(f, chooser) + book, filetype, err := uc.parseMeta(f) if err == nil { f.filetype = filetype outCh <- fileAndBook{f: f, b: book} @@ -136,22 +139,26 @@ func extractor(db *Database, chooser MetadataChooserFunc, fileCh chan fileData, // Parse errors are permanent. log.Printf("Could not parse %s: %v", f.path, err) - file := f.toLiberFile(true) - if err := db.PutFile(file); err != nil { + file, err := f.toLiberFile(uc.storage, true) + if err != nil { + log.Printf("Error saving file %s: %v", file.Path, err) + continue + } + if err := uc.db.PutFile(file); err != nil { log.Printf("Error saving file %s to db: %v", file.Path, err) } } } -func parseMeta(f fileData, chooser MetadataChooserFunc) (*Book, string, error) { +func (uc *updateContext) parseMeta(f fileData) (*Book, string, error) { // Attempt direct metadata extraction. - book, filetype, err := Parse(f.path) + book, filetype, err := Parse(uc.storage.Abs(f.path)) if err != nil { return nil, "", err } // Check if a Calibre OPF file exists. - if opfmeta, err := opfOpen(opfMetadataPath(f.path)); err == nil { + if opfmeta, err := opfOpen(opfMetadataPath(uc.storage.Abs(f.path))); err == nil { book.Metadata.Merge(opfmeta) } else { // No local metadata, use Google Books to retrieve @@ -162,8 +169,8 @@ func parseMeta(f fileData, chooser MetadataChooserFunc) (*Book, string, error) { if len(candidates) == 1 { log.Printf("found Google Books match: %s", candidates[0].String()) book.Metadata.Merge(candidates[0]) - } else { - if userchoice := chooser(f.path, candidates); userchoice != nil { + } else if uc.chooser != nil { + if userchoice := uc.chooser(f.path, candidates); userchoice != nil { book.Metadata.Merge(userchoice) } } @@ -179,11 +186,11 @@ func parseMeta(f fileData, chooser MetadataChooserFunc) (*Book, string, error) { // Try to find a cover image. Look on the local filesystem // first, otherwise check Google Books. localCoverPath := opfCoverPath(f.path) - if _, err := os.Stat(localCoverPath); err == nil { + if uc.storage.Exists(localCoverPath) { book.CoverPath = localCoverPath } else if imageData, err := GetGoogleBooksCover(book.Metadata); err == nil { imageFileName := f.path + ".cover.png" - if imgf, err := os.Create(imageFileName); err != nil { + if imgf, err := os.Create(uc.storage.Abs(imageFileName)); err != nil { log.Printf("Could not save cover image for %d: %v", book.Id, err) } else { imgf.Write(imageData) @@ -195,7 +202,7 @@ func parseMeta(f fileData, chooser MetadataChooserFunc) (*Book, string, error) { return book, filetype, nil } -func dbwriter(db *Database, ch chan fileAndBook) { +func (uc *updateContext) dbwriter(ch chan fileAndBook) { for pair := range ch { saveBook := true @@ -203,7 +210,7 @@ func dbwriter(db *Database, ch chan fileAndBook) { // existing book. if pair.f.id == 0 { log.Printf("potential new book: %#v", pair.b.Metadata) - if match, err := db.Find(pair.b.Metadata.Uniques()); err == nil { + if match, err := uc.db.Find(pair.b.Metadata.Uniques()); err == nil { log.Printf("%s matches existing book %d", pair.f.path, match.Id) // Ignore new metadata. pair.b = match @@ -219,15 +226,19 @@ func dbwriter(db *Database, ch chan fileAndBook) { } if saveBook { - if err := db.PutBook(pair.b); err != nil { + if err := uc.db.PutBook(pair.b); err != nil { log.Printf("Error saving book %d to db: %v", pair.b.Id, err) continue } log.Printf("%s -> %d", pair.f.path, pair.b.Id) } - file := pair.f.toLiberFile(false) - if err := db.PutFile(file); err != nil { + file, err := pair.f.toLiberFile(uc.storage, false) + if err != nil { + log.Printf("Error saving file %s: %v", pair.f.path, err) + continue + } + if err := uc.db.PutFile(file); err != nil { log.Printf("Error saving file %s to db: %v", file.Path, err) } } @@ -236,13 +247,19 @@ func dbwriter(db *Database, ch chan fileAndBook) { func (db *Database) Update(dir string, chooser MetadataChooserFunc) { // Parallelize metadata extraction, serialize database updates // (so that index-based de-duplication works). + uc := &updateContext{ + db: db, + chooser: chooser, + storage: NewFileStorage(dir), + } + var wg sync.WaitGroup - ch := differ(db, dir) + ch := uc.differ(dir) pch := make(chan fileAndBook) for i := 0; i < 10; i++ { wg.Add(1) go func() { - extractor(db, chooser, ch, pch) + uc.extractor(ch, pch) wg.Done() }() } @@ -250,5 +267,5 @@ func (db *Database) Update(dir string, chooser MetadataChooserFunc) { wg.Wait() close(pch) }() - dbwriter(db, pch) + uc.dbwriter(pch) } diff --git a/update_test.go b/update_test.go index 59b6c0dc97a72552d18d13aa2f89d377556dabc7..7b6211be9a7d78969365526794d41d6cf9f2f165 100644 --- a/update_test.go +++ b/update_test.go @@ -4,6 +4,7 @@ import ( "io/ioutil" "os" "path/filepath" + "strings" "testing" "git.autistici.org/ale/liber/util" @@ -45,14 +46,43 @@ func TestDatabase_Update(t *testing.T) { if _, err := db.GetBook(td.refbookid); err == nil { t.Errorf("%s: test book still in database", tag) } + // Test OPF ebook should have been found by Update. if result, err := db.Search("isbn:9781939293015", 0, 1); err != nil || result.NumResults != 1 { t.Errorf("%s: new book not found in database", tag) } } + // The second update should do nothing. db.Update(tmpdir, chooser) testDb("first update") db.Update(tmpdir, chooser) testDb("second update") + + // Check that the test file is there. + if _, err := db.GetFile("book/Test Ebook.pdf"); err != nil { + t.Errorf("test file is not in the database") + } + + // Files should have relative paths. + for i := db.Scan(FileBucket); i.Valid(); i.Next() { + var f File + if err := i.Value(&f); err != nil { + t.Fatal(err) + } + if strings.HasPrefix(f.Path, "/") { + t.Errorf("file has absolute path: %v", f.Path) + } + } + + // Book cover images should have relative paths. + for i := db.Scan(BookBucket); i.Valid(); i.Next() { + var b Book + if err := i.Value(&b); err != nil { + t.Fatal(err) + } + if b.CoverPath != "" && strings.HasPrefix(b.CoverPath, "/") { + t.Errorf("file has absolute path: %v", b.CoverPath) + } + } } diff --git a/web.go b/web.go index 8cac14a7ba3c86d8f695421854d2183aa027c0af..cd1f33f4afccbaf479f26d09a70e6f03cca5666b 100644 --- a/web.go +++ b/web.go @@ -27,8 +27,8 @@ var ( type uiServer struct { db *Database - storage *FileStorage - cache *FileStorage + storage *RWFileStorage + cache *RWFileStorage } type pagination struct { @@ -336,7 +336,7 @@ func handleOpenSearchXml(w http.ResponseWriter, req *http.Request) { render("opensearch_xml.html", w, &ctx) } -func NewHttpServer(db *Database, storage, cache *FileStorage, addr string) *http.Server { +func NewHttpServer(db *Database, storage, cache *RWFileStorage, addr string) *http.Server { var err error tpl, err = template.New("liber").Funcs(template.FuncMap{ "join": strings.Join, diff --git a/web_test.go b/web_test.go index aafefe04fe3bf93f9a3491aa7ca84204ecadcb24..54c9d0530905e367e058becfe9c714fb740a1722 100644 --- a/web_test.go +++ b/web_test.go @@ -29,7 +29,7 @@ func newTestHttpServer(t *testing.T) (*testHttpServer, *httptest.Server) { ts.tmpdir, _ = ioutil.TempDir("", "tmp-storage-") ts.td, ts.db = newTestDatabase(t) - tempStorage := NewFileStorage(ts.tmpdir, 2) + tempStorage := NewRWFileStorage(ts.tmpdir, 2) server := NewHttpServer(ts.db, tempStorage, tempStorage, ":1234") return &ts, httptest.NewServer(server.Handler) }