update.go 7.96 KB
Newer Older
ale's avatar
ale committed
1 2 3
package liber

import (
ale's avatar
ale committed
4
	"errors"
ale's avatar
ale committed
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
	"log"
	"os"
	"sync"

	"git.autistici.org/ale/liber/util"
)

const (
	SourceDB = 1 << iota
	SourceFS
)

type MetadataChooserFunc func(string, []*Metadata) *Metadata

type fileData struct {
ale's avatar
ale committed
20 21
	source   int
	path     string
22
	relpath  string
ale's avatar
ale committed
23 24 25
	filetype string
	id       BookId
	info     os.FileInfo
ale's avatar
ale committed
26 27
}

28
func (f fileData) toLiberFile(storage *FileStorage, haserr bool) (*File, error) {
ale's avatar
ale committed
29
	return &File{
ale's avatar
ale committed
30 31 32 33 34 35
		Path:     f.path,
		FileType: f.filetype,
		Mtime:    f.info.ModTime(),
		Size:     f.info.Size(),
		Id:       f.id,
		Error:    haserr,
36
	}, nil
ale's avatar
ale committed
37 38
}

ale's avatar
ale committed
39 40 41 42 43
type fileAndBook struct {
	f fileData
	b *Book
}

44
type updateContext struct {
45 46 47 48 49
	db        *Database
	storage   *FileStorage
	chooser   MetadataChooserFunc
	providers []MetadataProvider
	refiners  []MetadataRefiner
50 51 52
}

func (uc *updateContext) dbFileScanner(fileCh chan fileData) {
53 54 55
	iter := uc.db.Scan(FileBucket)
	defer iter.Close()
	for iter.Next() {
ale's avatar
ale committed
56 57 58 59 60 61 62 63 64 65 66 67
		var f File
		if err := iter.Value(&f); err != nil {
			continue
		}
		fileCh <- fileData{
			source: SourceDB,
			path:   f.Path,
			id:     f.Id,
		}
	}
}

68 69
func (uc *updateContext) localFileScanner(basedir string, fileCh chan fileData) {
	uc.storage.Walk(util.NewDefaultWalker(), func(path string, info os.FileInfo, err error) error {
ale's avatar
ale committed
70 71 72 73 74 75 76 77 78
		fileCh <- fileData{
			source: SourceFS,
			path:   path,
			info:   info,
		}
		return nil
	})
}

79
func (uc *updateContext) differ(basedir string) chan fileData {
ale's avatar
ale committed
80 81 82 83
	fileCh := make(chan fileData, 100)
	outCh := make(chan fileData, 100)
	var wg sync.WaitGroup
	wg.Add(2)
ale's avatar
ale committed
84 85

	// Start two sources in parallel and send their output to fileCh.
ale's avatar
ale committed
86
	go func() {
87
		uc.localFileScanner(basedir, fileCh)
ale's avatar
ale committed
88 89 90
		wg.Done()
	}()
	go func() {
91
		uc.dbFileScanner(fileCh)
ale's avatar
ale committed
92 93
		wg.Done()
	}()
ale's avatar
ale committed
94
	// Once they are done, close the channel.
ale's avatar
ale committed
95 96 97 98 99
	go func() {
		wg.Wait()
		close(fileCh)
	}()
	go func() {
ale's avatar
ale committed
100 101 102 103 104
		// Merge the two sources and keep track of files that
		// only appear in the database but not on the
		// filesystem, so we can remove them at the end.
		// All entries with source == SourceFS will be sent to
		// the output channel in any case.
ale's avatar
ale committed
105 106 107 108
		allSources := SourceDB | SourceFS
		tmp := make(map[string]int)
		for f := range fileCh {
			tmp[f.path] |= f.source
ale's avatar
ale committed
109
			// Delete entries as soon as we've seen them
ale's avatar
ale committed
110
			// originate from both sources.
ale's avatar
ale committed
111 112 113 114 115 116 117 118 119
			if tmp[f.path] == allSources {
				delete(tmp, f.path)
			}
			if f.source == SourceFS {
				outCh <- f
			}
		}
		for path, value := range tmp {
			if value == SourceDB {
120 121
				log.Printf("file %s has been removed", path)
				uc.db.DeleteFile(path)
ale's avatar
ale committed
122 123 124 125 126 127 128
			}
		}
		close(outCh)
	}()
	return outCh
}

129
func (uc *updateContext) extractor(fileCh chan fileData, outCh chan fileAndBook) {
ale's avatar
ale committed
130
	for f := range fileCh {
131
		if oldfile, err := uc.db.GetFile(f.path); err == nil {
ale's avatar
ale committed
132 133 134
			if !oldfile.HasChanged(f.info) {
				continue
			}
ale's avatar
ale committed
135
			f.id = oldfile.Id
ale's avatar
ale committed
136
		}
137
		book, filetype, err := uc.parseMeta(f)
ale's avatar
ale committed
138 139 140
		if err == nil {
			f.filetype = filetype
			outCh <- fileAndBook{f: f, b: book}
ale's avatar
ale committed
141 142 143
			continue
		}

ale's avatar
ale committed
144 145
		// Parse errors are permanent.
		log.Printf("Could not parse %s: %v", f.path, err)
146 147 148 149 150 151
		file, err := f.toLiberFile(uc.storage, true)
		if err != nil {
			log.Printf("Error saving file %s: %v", file.Path, err)
			continue
		}
		if err := uc.db.PutFile(file); err != nil {
ale's avatar
ale committed
152
			log.Printf("Error saving file %s to db: %v", file.Path, err)
ale's avatar
ale committed
153 154 155 156
		}
	}
}

157
func (uc *updateContext) parseMeta(f fileData) (*Book, string, error) {
158
	filetype, err := GetFileType(f.path)
ale's avatar
ale committed
159
	if err != nil {
ale's avatar
ale committed
160
		return nil, "", err
ale's avatar
ale committed
161 162
	}

163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
	// Attempt metadata extraction from the providers. The first
	// match returned stops the iteration. At the same time, look
	// for a cover image until one is found.
	var meta *Metadata
	var coverPath string

	for _, provider := range uc.providers {
		if meta == nil {
			meta, err = provider.Lookup(uc.storage, f.path, filetype)
			if err != nil {
				log.Printf("%s: %s: could not parse: %v", f.path, provider.Name(), err)
			} else if meta != nil {
				log.Printf("%s: identified by: %s", f.path, provider.Name())
			}
		}
		if coverPath == "" {
			coverPath, err = provider.GetBookCover(uc.storage, f.path)
			if err != nil {
				log.Printf("%s: %s: could not fetch cover image at %s", f.path, provider.Name(), err)
			} else if coverPath != "" {
				log.Printf("%s: cover image found by: %s", f.path, provider.Name())
			}
		}
	}
	if meta == nil {
		return nil, "", errors.New("no metadata could be identified")
	}

	// If the book cover couldn't be found locally, prepare to
	// download it. It's possible that we've already done this, so
	// check in the storage first (TODO: this check isn't useful,
	// if the cover exists it should have been emitted by the
	// fileProvider above).
	localCoverPath := f.path + ".cover.png"
	if coverPath == "" && uc.storage.Exists(localCoverPath) {
		coverPath = localCoverPath
	}

	// Only run remote checks if the metadata isn't complete.
	if !meta.Complete() {

		// Integrate metadata using the refiners. We check them all,
		// and merge their results into the metadata object. The user
		// is prompted if a choice is necessary. Search for a book
		// cover only until one is found.
		for _, refiner := range uc.refiners {
			candidates, err := refiner.Lookup(meta)
			if err == nil && len(candidates) > 0 {
				if len(candidates) == 1 {
					log.Printf("found match from %s: %s", refiner.Name(), candidates[0].String())
					meta.Merge(candidates[0])
				} else if uc.chooser != nil {
					if userchoice := uc.chooser(f.path, candidates); userchoice != nil {
						meta.Merge(userchoice)
					}
				}
			}

			if coverPath == "" {
				if coverData, err := refiner.GetBookCover(meta); err == nil {
					if imgf, err := os.Create(uc.storage.Abs(localCoverPath)); err != nil {
						log.Printf("Error saving cover image: %v", err)
					} else {
						imgf.Write(coverData)
						imgf.Close()
						coverPath = localCoverPath
					}
ale's avatar
ale committed
230 231 232
				}
			}
		}
233

ale's avatar
ale committed
234 235
	}

ale's avatar
ale committed
236 237
	// Check if the book metadata looks ok. If not, don't even
	// bother looking for a cover image.
238
	if !meta.Sufficient() {
ale's avatar
ale committed
239
		return nil, "", errors.New("insufficient metadata")
ale's avatar
ale committed
240 241
	}

242 243 244 245
	// Create a Book with no ID (yet).
	book := &Book{
		Metadata:  meta,
		CoverPath: coverPath,
ale's avatar
ale committed
246 247
	}

ale's avatar
ale committed
248 249 250
	return book, filetype, nil
}

251
func (uc *updateContext) dbwriter(ch chan fileAndBook) {
ale's avatar
ale committed
252 253 254 255 256 257 258
	for pair := range ch {
		saveBook := true

		// If this is a new file, see if it matches an already
		// existing book.
		if pair.f.id == 0 {
			log.Printf("potential new book: %#v", pair.b.Metadata)
259
			if match, err := uc.db.Find(pair.b.Metadata.Uniques()); err == nil {
ale's avatar
ale committed
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
				log.Printf("%s matches existing book %d", pair.f.path, match.Id)
				// Ignore new metadata.
				pair.b = match
				saveBook = false
			} else {
				// Assign a new ID to the book.
				pair.b.Id = NewID()
			}
			pair.f.id = pair.b.Id
		} else {
			// Overwrite the old book metadata.
			pair.b.Id = pair.f.id
		}

		if saveBook {
275
			if err := uc.db.PutBook(pair.b); err != nil {
ale's avatar
ale committed
276 277 278 279 280 281
				log.Printf("Error saving book %d to db: %v", pair.b.Id, err)
				continue
			}
			log.Printf("%s -> %d", pair.f.path, pair.b.Id)
		}

282 283 284 285 286 287
		file, err := pair.f.toLiberFile(uc.storage, false)
		if err != nil {
			log.Printf("Error saving file %s: %v", pair.f.path, err)
			continue
		}
		if err := uc.db.PutFile(file); err != nil {
ale's avatar
ale committed
288 289
			log.Printf("Error saving file %s to db: %v", file.Path, err)
		}
ale's avatar
ale committed
290 291 292 293
	}
}

func (db *Database) Update(dir string, chooser MetadataChooserFunc) {
ale's avatar
ale committed
294 295
	// Parallelize metadata extraction, serialize database updates
	// (so that index-based de-duplication works).
296 297 298 299
	uc := &updateContext{
		db:      db,
		chooser: chooser,
		storage: NewFileStorage(dir),
300 301 302 303 304 305 306 307 308 309 310

		// Calibre/OPF must be first, so we don't attempt to
		// parse the file itself.
		providers: []MetadataProvider{
			&opfProvider{},
			&fileProvider{},
		},

		// Check Google Books when the metadata is not
		// sufficient to fully describe the book.
		refiners: []MetadataRefiner{
311
			//&openLibraryRefiner{},
312 313
			&googleBooksRefiner{},
		},
314 315
	}

ale's avatar
ale committed
316
	var wg sync.WaitGroup
317
	ch := uc.differ(dir)
ale's avatar
ale committed
318
	pch := make(chan fileAndBook)
ale's avatar
ale committed
319 320 321
	for i := 0; i < 10; i++ {
		wg.Add(1)
		go func() {
322
			uc.extractor(ch, pch)
ale's avatar
ale committed
323 324 325
			wg.Done()
		}()
	}
ale's avatar
ale committed
326 327 328 329
	go func() {
		wg.Wait()
		close(pch)
	}()
330
	uc.dbwriter(pch)
ale's avatar
ale committed
331
}