From 3599ba3c2e61f3115ebe6a5aa2a2392acf1f5a1f Mon Sep 17 00:00:00 2001 From: ale <ale@incal.net> Date: Sun, 9 Nov 2014 09:39:17 +0000 Subject: [PATCH] Calibre integration --- README.md | 7 ++++ files.go | 4 ++ htdocs/templates/index.html | 4 +- metadata.go | 5 +++ opf.go | 84 +++++++++++++++++++++++++++++++++++++ opf_test.go | 56 +++++++++++++++++++++++++ update.go | 71 +++++++++++++++++++++---------- 7 files changed, 206 insertions(+), 25 deletions(-) create mode 100644 opf.go create mode 100644 opf_test.go diff --git a/README.md b/README.md index 06a1b9d..6a0bcbe 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,13 @@ You can run `liber --update` as many times as you like (for example whenever you add an ebook to your collection), it will automatically detect new files and files that have been removed. +#### Integration with Calibre + +If you use Calibre to mantain your ebook collection, you can index it +with `liber` by simply pointing its `--book-dir` option at the Calibre +library directory. `liber` will read Calibre metadata files and cover +images, and it will not perform remote searches for book metadata. + ### Searching You can search the index from the command line, for example: diff --git a/files.go b/files.go index 1346d16..013f2e4 100644 --- a/files.go +++ b/files.go @@ -3,6 +3,7 @@ package liber import ( "os" "path/filepath" + "strings" ) type FileStorage struct { @@ -35,5 +36,8 @@ func (s *FileStorage) Create(path string) (*os.File, error) { // Open a file. func (s *FileStorage) Open(path string) (*os.File, error) { + if strings.HasPrefix(path, "/") { + return os.Open(path) + } return os.Open(filepath.Join(s.Root, path)) } diff --git a/htdocs/templates/index.html b/htdocs/templates/index.html index 041de1f..58bd030 100644 --- a/htdocs/templates/index.html +++ b/htdocs/templates/index.html @@ -9,8 +9,6 @@ </head> <body> - <script>var on_load = null;</script> - <div class="site-wrapper"> <div class="site-wrapper-inner"> <div class="cover-container"> @@ -30,7 +28,7 @@ autocomplete="off" action="/search"> <input type="text" class="input-lg form-control" id="search_field" data-provide="typeahead" - name="q" size="30"> + name="q" size="30" autofocus> <button type="submit" class="btn btn-lg btn-primary"> Cerca </button> diff --git a/metadata.go b/metadata.go index 630586b..d762b6e 100644 --- a/metadata.go +++ b/metadata.go @@ -29,6 +29,11 @@ type Metadata struct { Sources []MetadataSource } +// Sufficient returns true if the object contains enough information. +func (m *Metadata) Sufficient() bool { + return m.Title != "" +} + // Uniques returns the list of possible unique tokens for this book. func (m *Metadata) Uniques() []string { var out []string diff --git a/opf.go b/opf.go new file mode 100644 index 0000000..e57e849 --- /dev/null +++ b/opf.go @@ -0,0 +1,84 @@ +package liber + +import ( + "encoding/xml" + "io" + "os" + "path/filepath" + "strings" +) + +type opfIdentifier struct { + Scheme string `xml:"http://www.idpf.org/2007/opf scheme,attr"` + Value string `xml:",chardata"` +} + +type opfMeta struct { + XMLName xml.Name `xml:"http://www.idpf.org/2007/opf metadata"` + Title string `xml:"http://purl.org/dc/elements/1.1/ title"` + Date string `xml:"http://purl.org/dc/elements/1.1/ date"` + Description string `xml:"http://purl.org/dc/elements/1.1/ description"` + Creator []string `xml:"http://purl.org/dc/elements/1.1/ creator"` + Language []string `xml:"http://purl.org/dc/elements/1.1/ language"` + Publisher []string `xml:"http://purl.org/dc/elements/1.1/ publisher"` + Identifier []opfIdentifier `xml:"http://purl.org/dc/elements/1.1/ identifier"` +} + +type opfPackage struct { + Meta *opfMeta +} + +func (o *opfMeta) ToMetadata() *Metadata { + m := &Metadata{ + Title: o.Title, + Description: o.Description, + Publisher: o.Publisher, + Language: o.Language, + } + if o.Date != "" && o.Date != "0101-01-01T00:00:00+00:00" { + m.Date = toYear(o.Date) + } + for _, c := range o.Creator { + for _, cc := range strings.Split(c, ",") { + m.Creator = append(m.Creator, strings.TrimSpace(cc)) + } + } + var uuid string + for _, id := range o.Identifier { + if id.Scheme == "ISBN" { + m.ISBN = append(m.ISBN, id.Value) + } else if id.Scheme == "uuid" { + uuid = id.Value + } + } + m.Sources = []MetadataSource{{ + Name: "opf", + ID: uuid, + }} + return m +} + +func opfParse(r io.Reader) (*Metadata, error) { + var opf opfPackage + if err := xml.NewDecoder(r).Decode(&opf); err != nil { + return nil, err + } + return opf.Meta.ToMetadata(), nil +} + +func opfOpen(path string) (*Metadata, error) { + file, err := os.Open(path) + if err != nil { + return nil, err + } + defer file.Close() + return opfParse(file) +} + +func opfMetadataPath(epubPath string) string { + return filepath.Join(filepath.Dir(epubPath), "metadata.opf") +} + +func opfCoverPath(epubPath string) string { + return filepath.Join(filepath.Dir(epubPath), "cover.jpg") +} diff --git a/opf_test.go b/opf_test.go new file mode 100644 index 0000000..61b59d4 --- /dev/null +++ b/opf_test.go @@ -0,0 +1,56 @@ +package liber + +import ( + "reflect" + "strings" + "testing" +) + +var testOpf = `<?xml version='1.0' encoding='utf-8'?> +<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id"> + <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf"> + <dc:identifier opf:scheme="calibre" id="calibre_id">3</dc:identifier> + <dc:identifier opf:scheme="uuid" id="uuid_id">96e4a1ff-dd24-4966-8fe9-2d16a14b9fb0</dc:identifier> + <dc:title>Cypherpunks: Freedom and the Future of the Internet</dc:title> + <dc:creator opf:file-as="Julian Assange, Jacob Appelbaum, Andy Muller-Maguhn, Jeremie Zimmermann" opf:role="aut">Julian Assange, Jacob Appelbaum, Andy Muller-Maguhn, Jeremie Zimmermann</dc:creator> + <dc:contributor opf:file-as="calibre" opf:role="bkp">calibre (0.9.18) [http://calibre-ebook.com]</dc:contributor> + <dc:date>2012-11-25T23:00:00+00:00</dc:date> + <dc:description><div><p class="description">The harassment of WikiLeaks and other Internet activists, together with attempts to introduce anti-file sharing legislation such as SOPA and ACTA, indicate that the politics of the Internet have reached a crossroads. In one direction lies a future that guarantees, in the watchwords of the cypherpunks, “privacy for the weak and transparency for the powerful”; in the other lies an Internet that allows government and large corporations to discover ever more about internet users while hiding their own activities. Assange and his co-discussants unpick the complex issues surrounding this crucial choice with clarity and engaging enthusiasm.</p><p class="description">released by the CypherTeam</p></div></dc:description> + <dc:publisher>OR Books</dc:publisher> + <dc:identifier opf:scheme="MOBI-ASIN">6de78a57-3b52-45fe-9670-5621d44582d7</dc:identifier> + <dc:identifier opf:scheme="ISBN">9781939293015</dc:identifier> + <dc:language>fra</dc:language> + <dc:subject>Bisac Code 1: POL039000</dc:subject> + <meta content="{"Julian Assange, Jacob Appelbaum, Andy Muller-Maguhn, Jeremie Zimmermann": ""}" name="calibre:author_link_map"/> + <meta content="2013-08-24T12:13:18+00:00" name="calibre:timestamp"/> + <meta content="Cypherpunks: Freedom and the Future of the Internet" name="calibre:title_sort"/> + </metadata> + <guide> + <reference href="cover.jpg" title="Cover" type="cover"/> + </guide> +</package> +` + +func TestOpf_Parse(t *testing.T) { + result, err := opfParse(strings.NewReader(testOpf)) + if err != nil { + t.Fatal(err) + } + + expected := &Metadata{ + Title: "Cypherpunks: Freedom and the Future of the Internet", + Date: "2012", + Description: "<div><p class=\"description\">The harassment of WikiLeaks and other Internet activists, together with attempts to introduce anti-file sharing legislation such as SOPA and ACTA, indicate that the politics of the Internet have reached a crossroads. In one direction lies a future that guarantees, in the watchwords of the cypherpunks, “privacy for the weak and transparency for the powerful”; in the other lies an Internet that allows government and large corporations to discover ever more about internet users while hiding their own activities. Assange and his co-discussants unpick the complex issues surrounding this crucial choice with clarity and engaging enthusiasm.</p><p class=\"description\">released by the CypherTeam</p></div>", + ISBN: []string{"9781939293015"}, + Creator: []string{"Julian Assange", "Jacob Appelbaum", "Andy Muller-Maguhn", "Jeremie Zimmermann"}, + Publisher: []string{"OR Books"}, + Language: []string{"fra"}, + Sources: []MetadataSource{{ + Name: "opf", + ID: "96e4a1ff-dd24-4966-8fe9-2d16a14b9fb0", + }}, + } + if !reflect.DeepEqual(result, expected) { + t.Fatalf("Result does not match: expected=%#v, got=%#v", expected, result) + } +} diff --git a/update.go b/update.go index 4511aad..b97c2bd 100644 --- a/update.go +++ b/update.go @@ -1,6 +1,7 @@ package liber import ( + "errors" "log" "os" "sync" @@ -66,6 +67,8 @@ func differ(db *Database, basedir string) chan fileData { outCh := make(chan fileData, 100) var wg sync.WaitGroup wg.Add(2) + + // Start two sources in parallel and send their output to fileCh. go func() { localFileScanner(db, basedir, fileCh) wg.Done() @@ -74,22 +77,27 @@ func differ(db *Database, basedir string) chan fileData { dbFileScanner(db, fileCh) wg.Done() }() + // Once they are done, close the channel. go func() { wg.Wait() close(fileCh) }() go func() { - // Yeah we'll end up more or less keeping the entire - // db in memory in order to detect which files went - // away... + // Detect files that have not changed, i.e. appear in + // the database and the filesystem. Keep track of book + // IDs so that once all entries have been processed we + // can delete those books from the database where the + // original file has been removed. allSources := SourceDB | SourceFS tmp := make(map[string]int) ids := make(map[string]BookId) for f := range fileCh { - log.Printf("differ: %#v", f) + // log.Printf("differ: %#v", f) tmp[f.path] |= f.source + // Delete entries as soon as we've seen them + // from both sources. if tmp[f.path] == allSources { - log.Printf("differ: dropping %s", f.path) + // log.Printf("differ: dropping %s", f.path) delete(tmp, f.path) delete(ids, f.path) } @@ -121,9 +129,9 @@ func adder(db *Database, chooser MetadataChooserFunc, fileCh chan fileData) { } var err error - f.id, err = addNewBook(db, f, oldid, chooser) + f.id, err = importBook(db, f, oldid, chooser) if err != nil { - log.Printf("Could not add %s: % v", f.path, err) + log.Printf("Could not add %s: %v", f.path, err) continue } @@ -134,32 +142,50 @@ func adder(db *Database, chooser MetadataChooserFunc, fileCh chan fileData) { } } -func addNewBook(db *Database, f fileData, oldid BookId, chooser MetadataChooserFunc) (BookId, error) { +func importBook(db *Database, f fileData, oldid BookId, chooser MetadataChooserFunc) (BookId, error) { + // Attempt direct metadata extraction. book, err := Parse(f.path) if err != nil { return 0, err } - candidates, err := LookupGoogleBooks(book.Metadata) - if err == nil && len(candidates) > 0 { - if len(candidates) == 1 { - log.Printf("found Google Books match: %s", candidates[0].String()) - book.Metadata.Merge(candidates[0]) - } else { - if userchoice := chooser(f.path, candidates); userchoice != nil { - book.Metadata.Merge(userchoice) - } - } - } - if oldid != 0 { book.Id = oldid } else { book.Id = NewID() } - // Try to find a cover image. - if imageData, err := GetGoogleBooksCover(book.Metadata); err == nil { + // Check if a Calibre OPF file exists. + if opfmeta, err := opfOpen(opfMetadataPath(f.path)); err == nil { + book.Metadata.Merge(opfmeta) + } else { + // No local metadata, use Google Books to retrieve + // more information on the book. Ask the user to + // choose in case there are multiple results. + candidates, err := LookupGoogleBooks(book.Metadata) + if err == nil && len(candidates) > 0 { + if len(candidates) == 1 { + log.Printf("found Google Books match: %s", candidates[0].String()) + book.Metadata.Merge(candidates[0]) + } else { + if userchoice := chooser(f.path, candidates); userchoice != nil { + book.Metadata.Merge(userchoice) + } + } + } + } + + // Check if the book metadata looks ok. + if !book.Metadata.Sufficient() { + return 0, errors.New("insufficient metadata") + } + + // Try to find a cover image. Look on the local filesystem + // first, otherwise check Google Books. + localCoverPath := opfCoverPath(f.path) + if _, err := os.Stat(localCoverPath); err == nil { + book.CoverPath = localCoverPath + } else if imageData, err := GetGoogleBooksCover(book.Metadata); err == nil { imageFileName := book.Path + ".cover.png" if imgf, err := os.Create(imageFileName); err != nil { log.Printf("Could not save cover image for %d: %v", book.Id, err) @@ -170,6 +196,7 @@ func addNewBook(db *Database, f fileData, oldid BookId, chooser MetadataChooserF } } + // Save the book in our database. if err := db.PutBook(book); err != nil { return book.Id, err } -- GitLab