Commit e26a7a72 authored by ale's avatar ale

Implement the 'refine' command

parent aba06b81
Pipeline #820 passed with stages
in 1 minute and 16 seconds
......@@ -37,6 +37,7 @@ func main() {
subcommands.Register(&dumpCommand{}, "Maintenance")
subcommands.Register(&restoreCommand{}, "Maintenance")
subcommands.Register(&reindexCommand{}, "Maintenance")
subcommands.Register(&refineCommand{}, "Maintenance")
subcommands.Register(&listCommand{}, "Maintenance")
log.SetFlags(0)
......
......@@ -5,21 +5,27 @@ import (
"flag"
"log"
"os"
"path/filepath"
"github.com/google/subcommands"
"git.autistici.org/ale/liber/util"
)
type refineCommand struct{}
type refineCommand struct {
noninteractive bool
}
func (c *refineCommand) SetFlags(f *flag.FlagSet) {
f.BoolVar(&c.noninteractive, "noninteractive", false, "disable user prompts")
}
func (c *refineCommand) Name() string { return "update" }
func (c *refineCommand) Synopsis() string { return "Add books to the local db" }
func (c *refineCommand) SetFlags(f *flag.FlagSet) {}
func (c *refineCommand) Name() string { return "refine" }
func (c *refineCommand) Synopsis() string { return "Improve metadata" }
func (c *refineCommand) Usage() string {
return `update [<OPTIONS>]
Add books to the local database.
return `refine [<OPTIONS>]
Improve metadata of books already in the database.
Expects a list of book IDs on standard input.
`
}
......@@ -33,15 +39,10 @@ func (c *refineCommand) Execute(ctx context.Context, f *flag.FlagSet, _ ...inter
db := openDB()
defer db.Close()
// Redirect logging to dbdir/refine.log.
logf, err := os.OpenFile(filepath.Join(*databaseDir, "refine.log"), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0644)
if err == nil {
defer logf.Close()
log.SetOutput(logf)
log.SetFlags(log.Ldate | log.Ltime)
if err := db.WithBookIDs(os.Stdin, db.RefineFunc(util.ExpandTilde(*bookDir), promptUser(c.noninteractive))); err != nil {
log.Printf("error: %v", err)
return subcommands.ExitFailure
}
db.Refine(util.ExpandTilde(*bookDir))
return subcommands.ExitSuccess
}
......@@ -3,16 +3,24 @@ package liber
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"log"
"os"
)
func (db *Database) Refine(dir string) error {
//storage := NewFileStorage(dir)
return db.onAllBooks(func(book *Book) error {
func (db *Database) RefineFunc(dir string, chooser MetadataChooserFunc) func(*Book) error {
storage := NewFileStorage(dir)
refiners := defaultMetadataRefiners
return func(book *Book) error {
if err := refineMetadata(book, "", storage, refiners, chooser); err == nil {
log.Printf("%s: updated metadata", book.Id)
return db.PutBook(book)
}
return nil
})
}
}
// ListBooks writes IDs of books that match any of a series of
......@@ -51,3 +59,68 @@ func (db *Database) WithBookIDs(r io.Reader, f func(book *Book) error) error {
}
return scanner.Err()
}
// Local path is a hint for generating the cover image.
func findCoverImage(book *Book, path string, refiners []MetadataRefiner, storage *FileStorage) error {
if book.CoverPath != "" {
return nil
}
localCoverPath := path + ".cover.png"
for _, refiner := range refiners {
if coverData, err := refiner.GetBookCover(book.Metadata); err == nil {
f, err := os.Create(storage.Abs(localCoverPath))
if err != nil {
return err
}
f.Write(coverData)
f.Close()
book.CoverPath = localCoverPath
return nil
}
}
// Not finding a cover image is not an error.
return nil
}
// Note: the Book may not have an ID assigned yet.
// Local path is a UI hint for the chooser.
func refineMetadata(book *Book, path string, storage *FileStorage, refiners []MetadataRefiner, chooser MetadataChooserFunc) error {
meta := book.Metadata
// Only run remote checks if the metadata isn't complete.
if !meta.Complete() {
// Integrate metadata using the refiners. We check them all,
// and merge their results into the metadata object. The user
// is prompted if a choice is necessary. Search for a book
// cover only until one is found.
for _, refiner := range refiners {
candidates, err := refiner.Lookup(meta)
if err == nil && len(candidates) > 0 {
if len(candidates) == 1 {
log.Printf("found match from %s: %s", refiner.Name(), candidates[0].String())
meta.Merge(candidates[0])
} else if chooser != nil {
if userchoice := chooser(path, candidates); userchoice != nil {
meta.Merge(userchoice)
}
}
}
}
}
// Check if the book metadata looks ok. If not, don't even
// bother looking for a cover image.
if !meta.Sufficient() {
return errors.New("insufficient metadata")
}
// Errors finding/saving cover images are not fatal.
if err := findCoverImage(book, path, refiners, storage); err != nil {
log.Printf("Error saving cover image: %v", err)
}
return nil
}
......@@ -19,7 +19,6 @@ type MetadataChooserFunc func(string, []*Metadata) *Metadata
type fileData struct {
source int
path string
relpath string
filetype string
id BookId
info os.FileInfo
......@@ -188,14 +187,10 @@ func (uc *updateContext) parseMeta(f fileData) (*Book, string, error) {
return nil, "", errors.New("no metadata could be identified")
}
// If the book cover couldn't be found locally, prepare to
// download it. It's possible that we've already done this, so
// check in the storage first (TODO: this check isn't useful,
// if the cover exists it should have been emitted by the
// fileProvider above).
localCoverPath := f.path + ".cover.png"
if coverPath == "" && uc.storage.Exists(localCoverPath) {
coverPath = localCoverPath
// Create a Book with no ID (yet).
book := &Book{
Metadata: meta,
CoverPath: coverPath,
}
// Only run remote checks if the metadata isn't complete.
......@@ -217,20 +212,7 @@ func (uc *updateContext) parseMeta(f fileData) (*Book, string, error) {
}
}
}
if coverPath == "" {
if coverData, err := refiner.GetBookCover(meta); err == nil {
if imgf, err := os.Create(uc.storage.Abs(localCoverPath)); err != nil {
log.Printf("Error saving cover image: %v", err)
} else {
imgf.Write(coverData)
imgf.Close()
coverPath = localCoverPath
}
}
}
}
}
// Check if the book metadata looks ok. If not, don't even
......@@ -239,10 +221,9 @@ func (uc *updateContext) parseMeta(f fileData) (*Book, string, error) {
return nil, "", errors.New("insufficient metadata")
}
// Create a Book with no ID (yet).
book := &Book{
Metadata: meta,
CoverPath: coverPath,
// Errors finding/saving cover images are not fatal.
if err := findCoverImage(book, f.path, uc.refiners, uc.storage); err != nil {
log.Printf("Error saving cover image: %v", err)
}
return book, filetype, nil
......@@ -290,37 +271,39 @@ func (uc *updateContext) dbwriter(ch chan fileAndBook) {
}
}
func DefaultMetadataProviders() []MetadataProvider {
return []MetadataProvider{
var (
defaultMetadataProviders = []MetadataProvider{
// Calibre/OPF must be first, so we don't attempt to
// parse the file itself if we have external metadata.
&opfProvider{},
&fileProvider{},
}
}
defaultMetadataRefiners = []MetadataRefiner{
// Check Google Books when the metadata is not
// sufficient to fully describe the book.
&googleBooksRefiner{},
//&openLibraryRefiner{},
}
)
const numUpdateMetadataWorkers = 10
func (db *Database) Update(dir string, chooser MetadataChooserFunc) {
// Parallelize metadata extraction, serialize database updates
// (so that index-based de-duplication works).
uc := &updateContext{
db: db,
chooser: chooser,
storage: NewFileStorage(dir),
// Calibre/OPF must be first, so we don't attempt to
// parse the file itself.
providers: DefaultMetadataProviders(),
// Check Google Books when the metadata is not
// sufficient to fully describe the book.
refiners: []MetadataRefiner{
//&openLibraryRefiner{},
&googleBooksRefiner{},
},
db: db,
chooser: chooser,
storage: NewFileStorage(dir),
providers: defaultMetadataProviders,
refiners: defaultMetadataRefiners,
}
var wg sync.WaitGroup
ch := uc.differ(dir)
pch := make(chan fileAndBook)
for i := 0; i < 10; i++ {
for i := 0; i < numUpdateMetadataWorkers; i++ {
wg.Add(1)
go func() {
uc.extractor(ch, pch)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment