diff --git a/googlebooks.go b/googlebooks.go index f52cbbb6489ed1a642a31ffb4a44058f6e61d750..a3116c3c241c0644202cf01b38f88127cdaf4ffc 100644 --- a/googlebooks.go +++ b/googlebooks.go @@ -14,6 +14,12 @@ import ( "time" ) +type googleBooksRefiner struct{} + +func (r *googleBooksRefiner) Name() string { + return "gbooks" +} + type atomResultEntry struct { GoogleId string `xml:"id"` Title string `xml:"http://purl.org/dc/terms title"` @@ -142,7 +148,7 @@ func googleBooksGet(uri string) (*http.Response, error) { return nil, errors.New("deadline exceeded") } -func LookupGoogleBooks(m *Metadata) ([]*Metadata, error) { +func (r *googleBooksRefiner) Lookup(m *Metadata) ([]*Metadata, error) { qstr := googleBooksQuery(m) if qstr == "" { return nil, errors.New("insufficient metadata for query") @@ -173,7 +179,7 @@ var imageUnavailableMD5 = [16]byte{ 0x0d, 0xe4, 0x38, 0x3e, 0xba, 0xd0, 0xad, 0xad, 0x5e, 0xeb, 0x89, 0x75, 0xcd, 0x79, 0x66, 0x57, } -func GetGoogleBooksCover(m *Metadata) ([]byte, error) { +func (r *googleBooksRefiner) GetBookCover(m *Metadata) ([]byte, error) { gbid := getGoogleBooksId(m) if gbid == "" { return nil, errors.New("no ID") diff --git a/metadata.go b/metadata.go index b90d6aa9d564d68a6d810546abefa51a0af0648c..cd01280acd2cfd2c88cd8fdf737497569796ac56 100644 --- a/metadata.go +++ b/metadata.go @@ -11,6 +11,22 @@ import ( "github.com/meskio/epubgo" ) +// A metadata provider generates metadata from the local filesystem. +type MetadataProvider interface { + Name() string + Lookup(*FileStorage, string, string) (*Metadata, error) + GetBookCover(*FileStorage, string) (string, error) +} + +// A metadata refiner improves on existing metadata and may provide +// more than one result to choose from. It usually involves talking to +// a remote service. +type MetadataRefiner interface { + Name() string + Lookup(*Metadata) ([]*Metadata, error) + GetBookCover(*Metadata) ([]byte, error) +} + type MetadataSource struct { Name string ID string @@ -30,10 +46,18 @@ type Metadata struct { } // Sufficient returns true if the object contains enough information. +// If this check does not pass, the book won't be added to the database. func (m *Metadata) Sufficient() bool { return m.Title != "" } +// Complete returns true if we're satisfied with the quality of the +// information about this book. If this returns true, remote checks +// will be skipped. +func (m *Metadata) Complete() bool { + return (m.Title != "" && len(m.Creator) > 0 && len(m.ISBN) > 0) +} + // Uniques returns the list of possible unique tokens for this book. func (m *Metadata) Uniques() []string { var out []string @@ -220,23 +244,45 @@ func parseAnything(filename string) (*Metadata, error) { }, nil } -func Parse(filename string) (*Book, string, error) { +type fileProvider struct{} + +func (p *fileProvider) Lookup(storage *FileStorage, path, filetype string) (*Metadata, error) { + path = storage.Abs(path) var m *Metadata var err error - ext := strings.ToLower(filepath.Ext(filename)) - switch ext { + switch filetype { case ".epub": - m, err = parseEpub(filename) + m, err = parseEpub(path) case ".mobi": - m, err = parseMobi(filename) + m, err = parseMobi(path) case ".pdf": - m, err = parseAnything(filename) + m, err = parseAnything(path) default: - return nil, "", errors.New("unsupported file format") + return nil, errors.New("unsupported file format") } if err != nil { - return nil, "", err + return nil, err } - return &Book{Metadata: m}, ext, nil + return m, nil +} + +func (p *fileProvider) GetBookCover(storage *FileStorage, path string) (string, error) { + coverPath := path + ".cover.png" + if storage.Exists(coverPath) { + return coverPath, nil + } + return "", nil +} + +func (p *fileProvider) Name() string { + return "file" +} + +func GetFileType(path string) (string, error) { + filetype := strings.ToLower(filepath.Ext(path)) + if filetype != ".epub" && filetype != ".mobi" && filetype != ".pdf" { + return "", errors.New("unsupported file format") + } + return filetype, nil } diff --git a/opf.go b/opf.go index e57e84957174e2aa8e12d5b88226c4c8a2917ed7..a2a46f3d00250fa628486cc3e9f1bfe9593d45dc 100644 --- a/opf.go +++ b/opf.go @@ -82,3 +82,28 @@ func opfMetadataPath(epubPath string) string { func opfCoverPath(epubPath string) string { return filepath.Join(filepath.Dir(epubPath), "cover.jpg") } + +type opfProvider struct{} + +func (p *opfProvider) Lookup(storage *FileStorage, path, filetype string) (*Metadata, error) { + if !storage.Exists(opfMetadataPath(path)) { + return nil, nil + } + m, err := opfOpen(opfMetadataPath(storage.Abs(path))) + if err != nil { + return nil, err + } + return m, err +} + +func (p *opfProvider) GetBookCover(storage *FileStorage, path string) (string, error) { + coverPath := opfCoverPath(path) + if storage.Exists(coverPath) { + return coverPath, nil + } + return "", nil +} + +func (p *opfProvider) Name() string { + return "opf" +} diff --git a/update.go b/update.go index 87e0d62e900fc95e2f7dfd7bb1612ff101a497a3..d16f7d782b1aff1effbfb6b685c90134621d58af 100644 --- a/update.go +++ b/update.go @@ -42,9 +42,11 @@ type fileAndBook struct { } type updateContext struct { - db *Database - storage *FileStorage - chooser MetadataChooserFunc + db *Database + storage *FileStorage + chooser MetadataChooserFunc + providers []MetadataProvider + refiners []MetadataRefiner } func (uc *updateContext) dbFileScanner(fileCh chan fileData) { @@ -151,52 +153,94 @@ func (uc *updateContext) extractor(fileCh chan fileData, outCh chan fileAndBook) } func (uc *updateContext) parseMeta(f fileData) (*Book, string, error) { - // Attempt direct metadata extraction. - book, filetype, err := Parse(uc.storage.Abs(f.path)) + filetype, err := GetFileType(f.path) if err != nil { return nil, "", err } - // Check if a Calibre OPF file exists. - if opfmeta, err := opfOpen(opfMetadataPath(uc.storage.Abs(f.path))); err == nil { - book.Metadata.Merge(opfmeta) - } else { - // No local metadata, use Google Books to retrieve - // more information on the book. Ask the user to - // choose in case there are multiple results. - candidates, err := LookupGoogleBooks(book.Metadata) - if err == nil && len(candidates) > 0 { - if len(candidates) == 1 { - log.Printf("found Google Books match: %s", candidates[0].String()) - book.Metadata.Merge(candidates[0]) - } else if uc.chooser != nil { - if userchoice := uc.chooser(f.path, candidates); userchoice != nil { - book.Metadata.Merge(userchoice) + // Attempt metadata extraction from the providers. The first + // match returned stops the iteration. At the same time, look + // for a cover image until one is found. + var meta *Metadata + var coverPath string + + for _, provider := range uc.providers { + if meta == nil { + meta, err = provider.Lookup(uc.storage, f.path, filetype) + if err != nil { + log.Printf("%s: %s: could not parse: %v", f.path, provider.Name(), err) + } else if meta != nil { + log.Printf("%s: identified by: %s", f.path, provider.Name()) + } + } + if coverPath == "" { + coverPath, err = provider.GetBookCover(uc.storage, f.path) + if err != nil { + log.Printf("%s: %s: could not fetch cover image at %s", f.path, provider.Name(), err) + } else if coverPath != "" { + log.Printf("%s: cover image found by: %s", f.path, provider.Name()) + } + } + } + if meta == nil { + return nil, "", errors.New("no metadata could be identified") + } + + // If the book cover couldn't be found locally, prepare to + // download it. It's possible that we've already done this, so + // check in the storage first (TODO: this check isn't useful, + // if the cover exists it should have been emitted by the + // fileProvider above). + localCoverPath := f.path + ".cover.png" + if coverPath == "" && uc.storage.Exists(localCoverPath) { + coverPath = localCoverPath + } + + // Only run remote checks if the metadata isn't complete. + if !meta.Complete() { + + // Integrate metadata using the refiners. We check them all, + // and merge their results into the metadata object. The user + // is prompted if a choice is necessary. Search for a book + // cover only until one is found. + for _, refiner := range uc.refiners { + candidates, err := refiner.Lookup(meta) + if err == nil && len(candidates) > 0 { + if len(candidates) == 1 { + log.Printf("found match from %s: %s", refiner.Name(), candidates[0].String()) + meta.Merge(candidates[0]) + } else if uc.chooser != nil { + if userchoice := uc.chooser(f.path, candidates); userchoice != nil { + meta.Merge(userchoice) + } + } + } + + if coverPath == "" { + if coverData, err := refiner.GetBookCover(meta); err == nil { + if imgf, err := os.Create(uc.storage.Abs(localCoverPath)); err != nil { + log.Printf("Error saving cover image: %v", err) + } else { + imgf.Write(coverData) + imgf.Close() + coverPath = localCoverPath + } } } } + } // Check if the book metadata looks ok. If not, don't even // bother looking for a cover image. - if !book.Metadata.Sufficient() { + if !meta.Sufficient() { return nil, "", errors.New("insufficient metadata") } - // Try to find a cover image. Look on the local filesystem - // first, otherwise check Google Books. - localCoverPath := opfCoverPath(f.path) - if uc.storage.Exists(localCoverPath) { - book.CoverPath = localCoverPath - } else if imageData, err := GetGoogleBooksCover(book.Metadata); err == nil { - imageFileName := f.path + ".cover.png" - if imgf, err := os.Create(uc.storage.Abs(imageFileName)); err != nil { - log.Printf("Could not save cover image for %d: %v", book.Id, err) - } else { - imgf.Write(imageData) - imgf.Close() - book.CoverPath = imageFileName - } + // Create a Book with no ID (yet). + book := &Book{ + Metadata: meta, + CoverPath: coverPath, } return book, filetype, nil @@ -251,6 +295,19 @@ func (db *Database) Update(dir string, chooser MetadataChooserFunc) { db: db, chooser: chooser, storage: NewFileStorage(dir), + + // Calibre/OPF must be first, so we don't attempt to + // parse the file itself. + providers: []MetadataProvider{ + &opfProvider{}, + &fileProvider{}, + }, + + // Check Google Books when the metadata is not + // sufficient to fully describe the book. + refiners: []MetadataRefiner{ + &googleBooksRefiner{}, + }, } var wg sync.WaitGroup diff --git a/update_test.go b/update_test.go index 7b6211be9a7d78969365526794d41d6cf9f2f165..dfc2f33824acbc5ee822b83a5a2b4247f3ab5ecd 100644 --- a/update_test.go +++ b/update_test.go @@ -20,6 +20,30 @@ func createTestFs(fs map[string]string) string { return base } +func checkDbPathIntegrity(t *testing.T, db *Database) { + // Files should have relative paths. + for i := db.Scan(FileBucket); i.Valid(); i.Next() { + var f File + if err := i.Value(&f); err != nil { + t.Fatal(err) + } + if strings.HasPrefix(f.Path, "/") { + t.Errorf("file has absolute path: %v", f.Path) + } + } + + // Book cover images should have relative paths. + for i := db.Scan(BookBucket); i.Valid(); i.Next() { + var b Book + if err := i.Value(&b); err != nil { + t.Fatal(err) + } + if b.CoverPath != "" && strings.HasPrefix(b.CoverPath, "/") { + t.Errorf("file has absolute path: %v", b.CoverPath) + } + } +} + func TestDatabase_Update(t *testing.T) { util.WalkerDefaultMinSize = 0 @@ -59,30 +83,26 @@ func TestDatabase_Update(t *testing.T) { db.Update(tmpdir, chooser) testDb("second update") + if chooserCalled { + t.Errorf("chooser function was called") + } + // Check that the test file is there. if _, err := db.GetFile("book/Test Ebook.pdf"); err != nil { t.Errorf("test file is not in the database") } - // Files should have relative paths. - for i := db.Scan(FileBucket); i.Valid(); i.Next() { - var f File - if err := i.Value(&f); err != nil { - t.Fatal(err) - } - if strings.HasPrefix(f.Path, "/") { - t.Errorf("file has absolute path: %v", f.Path) - } - } + checkDbPathIntegrity(t, db) +} - // Book cover images should have relative paths. - for i := db.Scan(BookBucket); i.Valid(); i.Next() { - var b Book - if err := i.Value(&b); err != nil { - t.Fatal(err) - } - if b.CoverPath != "" && strings.HasPrefix(b.CoverPath, "/") { - t.Errorf("file has absolute path: %v", b.CoverPath) - } - } +func TestDatabase_UpdateEpub(t *testing.T) { + util.WalkerDefaultMinSize = 0 + + td, db := newTestDatabase(t) + defer td.Close() + + // Read the test epub from testdata/. + db.Update("testdata", nil) + + checkDbPathIntegrity(t, db) }