package liber import ( "errors" "log" "os" "sync" "git.autistici.org/ale/liber/util" ) const ( SourceDB = 1 << iota SourceFS ) type MetadataChooserFunc func(string, []*Metadata) *Metadata type fileData struct { source int path string filetype string id BookId info os.FileInfo } func (f fileData) toLiberFile(haserr bool) *File { return &File{ Path: f.path, FileType: f.filetype, Mtime: f.info.ModTime(), Size: f.info.Size(), Id: f.id, Error: haserr, } } type fileAndBook struct { f fileData b *Book } func dbFileScanner(db *Database, fileCh chan fileData) { for iter := db.Scan(FileBucket); iter.Valid(); iter.Next() { var f File if err := iter.Value(&f); err != nil { continue } fileCh <- fileData{ source: SourceDB, path: f.Path, id: f.Id, } } } func localFileScanner(db *Database, basedir string, fileCh chan fileData) { util.NewDefaultWalker().Walk(basedir, func(path string, info os.FileInfo, err error) error { if err != nil { return nil } fileCh <- fileData{ source: SourceFS, path: path, info: info, } return nil }) } func differ(db *Database, basedir string) chan fileData { fileCh := make(chan fileData, 100) outCh := make(chan fileData, 100) var wg sync.WaitGroup wg.Add(2) // Start two sources in parallel and send their output to fileCh. go func() { localFileScanner(db, basedir, fileCh) wg.Done() }() go func() { dbFileScanner(db, fileCh) wg.Done() }() // Once they are done, close the channel. go func() { wg.Wait() close(fileCh) }() go func() { // Merge the two sources and keep track of files that // only appear in the database but not on the // filesystem, so we can remove them at the end. // All entries with source == SourceFS will be sent to // the output channel in any case. allSources := SourceDB | SourceFS tmp := make(map[string]int) for f := range fileCh { tmp[f.path] |= f.source // Delete entries as soon as we've seen them // originate from both sources. if tmp[f.path] == allSources { delete(tmp, f.path) } if f.source == SourceFS { outCh <- f } } for path, value := range tmp { if value == SourceDB { log.Printf("removing file %s", path) db.DeleteFile(path) } } close(outCh) }() return outCh } func extractor(db *Database, chooser MetadataChooserFunc, fileCh chan fileData, outCh chan fileAndBook) { for f := range fileCh { if oldfile, err := db.GetFile(f.path); err == nil { if !oldfile.HasChanged(f.info) { continue } f.id = oldfile.Id } book, filetype, err := parseMeta(f, chooser) if err == nil { f.filetype = filetype outCh <- fileAndBook{f: f, b: book} continue } // Parse errors are permanent. log.Printf("Could not parse %s: %v", f.path, err) file := f.toLiberFile(true) if err := db.PutFile(file); err != nil { log.Printf("Error saving file %s to db: %v", file.Path, err) } } } func parseMeta(f fileData, chooser MetadataChooserFunc) (*Book, string, error) { // Attempt direct metadata extraction. book, filetype, err := Parse(f.path) if err != nil { return nil, "", err } // Check if a Calibre OPF file exists. if opfmeta, err := opfOpen(opfMetadataPath(f.path)); err == nil { book.Metadata.Merge(opfmeta) } else { // No local metadata, use Google Books to retrieve // more information on the book. Ask the user to // choose in case there are multiple results. candidates, err := LookupGoogleBooks(book.Metadata) if err == nil && len(candidates) > 0 { if len(candidates) == 1 { log.Printf("found Google Books match: %s", candidates[0].String()) book.Metadata.Merge(candidates[0]) } else { if userchoice := chooser(f.path, candidates); userchoice != nil { book.Metadata.Merge(userchoice) } } } } // Check if the book metadata looks ok. If not, don't even // bother looking for a cover image. if !book.Metadata.Sufficient() { return nil, "", errors.New("insufficient metadata") } // Try to find a cover image. Look on the local filesystem // first, otherwise check Google Books. localCoverPath := opfCoverPath(f.path) if _, err := os.Stat(localCoverPath); err == nil { book.CoverPath = localCoverPath } else if imageData, err := GetGoogleBooksCover(book.Metadata); err == nil { imageFileName := f.path + ".cover.png" if imgf, err := os.Create(imageFileName); err != nil { log.Printf("Could not save cover image for %d: %v", book.Id, err) } else { imgf.Write(imageData) imgf.Close() book.CoverPath = imageFileName } } return book, filetype, nil } func dbwriter(db *Database, ch chan fileAndBook) { for pair := range ch { saveBook := true // If this is a new file, see if it matches an already // existing book. if pair.f.id == 0 { log.Printf("potential new book: %#v", pair.b.Metadata) if match, err := db.Find(pair.b.Metadata.Uniques()); err == nil { log.Printf("%s matches existing book %d", pair.f.path, match.Id) // Ignore new metadata. pair.b = match saveBook = false } else { // Assign a new ID to the book. pair.b.Id = NewID() } pair.f.id = pair.b.Id } else { // Overwrite the old book metadata. pair.b.Id = pair.f.id } if saveBook { if err := db.PutBook(pair.b); err != nil { log.Printf("Error saving book %d to db: %v", pair.b.Id, err) continue } log.Printf("%s -> %d", pair.f.path, pair.b.Id) } file := pair.f.toLiberFile(false) if err := db.PutFile(file); err != nil { log.Printf("Error saving file %s to db: %v", file.Path, err) } } } func (db *Database) Update(dir string, chooser MetadataChooserFunc) { // Parallelize metadata extraction, serialize database updates // (so that index-based de-duplication works). var wg sync.WaitGroup ch := differ(db, dir) pch := make(chan fileAndBook) for i := 0; i < 10; i++ { wg.Add(1) go func() { extractor(db, chooser, ch, pch) wg.Done() }() } go func() { wg.Wait() close(pch) }() dbwriter(db, pch) }