From 3599ba3c2e61f3115ebe6a5aa2a2392acf1f5a1f Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Sun, 9 Nov 2014 09:39:17 +0000
Subject: [PATCH] Calibre integration

---
 README.md                   |  7 ++++
 files.go                    |  4 ++
 htdocs/templates/index.html |  4 +-
 metadata.go                 |  5 +++
 opf.go                      | 84 +++++++++++++++++++++++++++++++++++++
 opf_test.go                 | 56 +++++++++++++++++++++++++
 update.go                   | 71 +++++++++++++++++++++----------
 7 files changed, 206 insertions(+), 25 deletions(-)
 create mode 100644 opf.go
 create mode 100644 opf_test.go

diff --git a/README.md b/README.md
index 06a1b9d..6a0bcbe 100644
--- a/README.md
+++ b/README.md
@@ -84,6 +84,13 @@ You can run `liber --update` as many times as you like (for example
 whenever you add an ebook to your collection), it will automatically
 detect new files and files that have been removed.
 
+#### Integration with Calibre
+
+If you use Calibre to mantain your ebook collection, you can index it
+with `liber` by simply pointing its `--book-dir` option at the Calibre
+library directory. `liber` will read Calibre metadata files and cover
+images, and it will not perform remote searches for book metadata.
+
 ### Searching
 
 You can search the index from the command line, for example:
diff --git a/files.go b/files.go
index 1346d16..013f2e4 100644
--- a/files.go
+++ b/files.go
@@ -3,6 +3,7 @@ package liber
 import (
 	"os"
 	"path/filepath"
+	"strings"
 )
 
 type FileStorage struct {
@@ -35,5 +36,8 @@ func (s *FileStorage) Create(path string) (*os.File, error) {
 
 // Open a file.
 func (s *FileStorage) Open(path string) (*os.File, error) {
+	if strings.HasPrefix(path, "/") {
+		return os.Open(path)
+	}
 	return os.Open(filepath.Join(s.Root, path))
 }
diff --git a/htdocs/templates/index.html b/htdocs/templates/index.html
index 041de1f..58bd030 100644
--- a/htdocs/templates/index.html
+++ b/htdocs/templates/index.html
@@ -9,8 +9,6 @@
   </head>
 
   <body>
-    <script>var on_load = null;</script>
-
     <div class="site-wrapper">
       <div class="site-wrapper-inner">
         <div class="cover-container">
@@ -30,7 +28,7 @@
                     autocomplete="off" action="/search">
                 <input type="text" class="input-lg form-control"
                        id="search_field" data-provide="typeahead"
-                       name="q" size="30">
+                       name="q" size="30" autofocus>
                 <button type="submit" class="btn btn-lg btn-primary">
                   Cerca
                 </button>
diff --git a/metadata.go b/metadata.go
index 630586b..d762b6e 100644
--- a/metadata.go
+++ b/metadata.go
@@ -29,6 +29,11 @@ type Metadata struct {
 	Sources     []MetadataSource
 }
 
+// Sufficient returns true if the object contains enough information.
+func (m *Metadata) Sufficient() bool {
+	return m.Title != ""
+}
+
 // Uniques returns the list of possible unique tokens for this book.
 func (m *Metadata) Uniques() []string {
 	var out []string
diff --git a/opf.go b/opf.go
new file mode 100644
index 0000000..e57e849
--- /dev/null
+++ b/opf.go
@@ -0,0 +1,84 @@
+package liber
+
+import (
+	"encoding/xml"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+type opfIdentifier struct {
+	Scheme string `xml:"http://www.idpf.org/2007/opf scheme,attr"`
+	Value  string `xml:",chardata"`
+}
+
+type opfMeta struct {
+	XMLName     xml.Name        `xml:"http://www.idpf.org/2007/opf metadata"`
+	Title       string          `xml:"http://purl.org/dc/elements/1.1/ title"`
+	Date        string          `xml:"http://purl.org/dc/elements/1.1/ date"`
+	Description string          `xml:"http://purl.org/dc/elements/1.1/ description"`
+	Creator     []string        `xml:"http://purl.org/dc/elements/1.1/ creator"`
+	Language    []string        `xml:"http://purl.org/dc/elements/1.1/ language"`
+	Publisher   []string        `xml:"http://purl.org/dc/elements/1.1/ publisher"`
+	Identifier  []opfIdentifier `xml:"http://purl.org/dc/elements/1.1/ identifier"`
+}
+
+type opfPackage struct {
+	Meta *opfMeta
+}
+
+func (o *opfMeta) ToMetadata() *Metadata {
+	m := &Metadata{
+		Title:       o.Title,
+		Description: o.Description,
+		Publisher:   o.Publisher,
+		Language:    o.Language,
+	}
+	if o.Date != "" && o.Date != "0101-01-01T00:00:00+00:00" {
+		m.Date = toYear(o.Date)
+	}
+	for _, c := range o.Creator {
+		for _, cc := range strings.Split(c, ",") {
+			m.Creator = append(m.Creator, strings.TrimSpace(cc))
+		}
+	}
+	var uuid string
+	for _, id := range o.Identifier {
+		if id.Scheme == "ISBN" {
+			m.ISBN = append(m.ISBN, id.Value)
+		} else if id.Scheme == "uuid" {
+			uuid = id.Value
+		}
+	}
+	m.Sources = []MetadataSource{{
+		Name: "opf",
+		ID:   uuid,
+	}}
+	return m
+}
+
+func opfParse(r io.Reader) (*Metadata, error) {
+	var opf opfPackage
+	if err := xml.NewDecoder(r).Decode(&opf); err != nil {
+		return nil, err
+	}
+	return opf.Meta.ToMetadata(), nil
+}
+
+func opfOpen(path string) (*Metadata, error) {
+	file, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer file.Close()
+	return opfParse(file)
+}
+
+func opfMetadataPath(epubPath string) string {
+	return filepath.Join(filepath.Dir(epubPath), "metadata.opf")
+}
+
+func opfCoverPath(epubPath string) string {
+	return filepath.Join(filepath.Dir(epubPath), "cover.jpg")
+}
diff --git a/opf_test.go b/opf_test.go
new file mode 100644
index 0000000..61b59d4
--- /dev/null
+++ b/opf_test.go
@@ -0,0 +1,56 @@
+package liber
+
+import (
+	"reflect"
+	"strings"
+	"testing"
+)
+
+var testOpf = `<?xml version='1.0' encoding='utf-8'?>
+<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id">
+    <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
+        <dc:identifier opf:scheme="calibre" id="calibre_id">3</dc:identifier>
+        <dc:identifier opf:scheme="uuid" id="uuid_id">96e4a1ff-dd24-4966-8fe9-2d16a14b9fb0</dc:identifier>
+        <dc:title>Cypherpunks: Freedom and the Future of the Internet</dc:title>
+        <dc:creator opf:file-as="Julian Assange, Jacob Appelbaum, Andy Muller-Maguhn, Jeremie Zimmermann" opf:role="aut">Julian Assange, Jacob Appelbaum, Andy Muller-Maguhn, Jeremie Zimmermann</dc:creator>
+        <dc:contributor opf:file-as="calibre" opf:role="bkp">calibre (0.9.18) [http://calibre-ebook.com]</dc:contributor>
+        <dc:date>2012-11-25T23:00:00+00:00</dc:date>
+        <dc:description>&lt;div&gt;&lt;p class="description"&gt;The harassment of WikiLeaks and other Internet activists, together with attempts to introduce anti-file sharing legislation such as SOPA and ACTA, indicate that the politics of the Internet have reached a crossroads. In one direction lies a future that guarantees, in the watchwords of the cypherpunks, “privacy for the weak and transparency for the powerful”; in the other lies an Internet that allows government and large corporations to discover ever more about internet users while hiding their own activities. Assange and his co-discussants unpick the complex issues surrounding this crucial choice with clarity and engaging enthusiasm.&lt;/p&gt;&lt;p class="description"&gt;released by the CypherTeam&lt;/p&gt;&lt;/div&gt;</dc:description>
+        <dc:publisher>OR Books</dc:publisher>
+        <dc:identifier opf:scheme="MOBI-ASIN">6de78a57-3b52-45fe-9670-5621d44582d7</dc:identifier>
+        <dc:identifier opf:scheme="ISBN">9781939293015</dc:identifier>
+        <dc:language>fra</dc:language>
+        <dc:subject>Bisac Code 1: POL039000</dc:subject>
+        <meta content="{&quot;Julian Assange, Jacob Appelbaum, Andy Muller-Maguhn, Jeremie Zimmermann&quot;: &quot;&quot;}" name="calibre:author_link_map"/>
+        <meta content="2013-08-24T12:13:18+00:00" name="calibre:timestamp"/>
+        <meta content="Cypherpunks: Freedom and the Future of the Internet" name="calibre:title_sort"/>
+    </metadata>
+    <guide>
+        <reference href="cover.jpg" title="Cover" type="cover"/>
+    </guide>
+</package>
+`
+
+func TestOpf_Parse(t *testing.T) {
+	result, err := opfParse(strings.NewReader(testOpf))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	expected := &Metadata{
+		Title:       "Cypherpunks: Freedom and the Future of the Internet",
+		Date:        "2012",
+		Description: "<div><p class=\"description\">The harassment of WikiLeaks and other Internet activists, together with attempts to introduce anti-file sharing legislation such as SOPA and ACTA, indicate that the politics of the Internet have reached a crossroads. In one direction lies a future that guarantees, in the watchwords of the cypherpunks, “privacy for the weak and transparency for the powerful”; in the other lies an Internet that allows government and large corporations to discover ever more about internet users while hiding their own activities. Assange and his co-discussants unpick the complex issues surrounding this crucial choice with clarity and engaging enthusiasm.</p><p class=\"description\">released by the CypherTeam</p></div>",
+		ISBN:        []string{"9781939293015"},
+		Creator:     []string{"Julian Assange", "Jacob Appelbaum", "Andy Muller-Maguhn", "Jeremie Zimmermann"},
+		Publisher:   []string{"OR Books"},
+		Language:    []string{"fra"},
+		Sources: []MetadataSource{{
+			Name: "opf",
+			ID:   "96e4a1ff-dd24-4966-8fe9-2d16a14b9fb0",
+		}},
+	}
+	if !reflect.DeepEqual(result, expected) {
+		t.Fatalf("Result does not match: expected=%#v, got=%#v", expected, result)
+	}
+}
diff --git a/update.go b/update.go
index 4511aad..b97c2bd 100644
--- a/update.go
+++ b/update.go
@@ -1,6 +1,7 @@
 package liber
 
 import (
+	"errors"
 	"log"
 	"os"
 	"sync"
@@ -66,6 +67,8 @@ func differ(db *Database, basedir string) chan fileData {
 	outCh := make(chan fileData, 100)
 	var wg sync.WaitGroup
 	wg.Add(2)
+
+	// Start two sources in parallel and send their output to fileCh.
 	go func() {
 		localFileScanner(db, basedir, fileCh)
 		wg.Done()
@@ -74,22 +77,27 @@ func differ(db *Database, basedir string) chan fileData {
 		dbFileScanner(db, fileCh)
 		wg.Done()
 	}()
+	// Once they are done, close the channel.
 	go func() {
 		wg.Wait()
 		close(fileCh)
 	}()
 	go func() {
-		// Yeah we'll end up more or less keeping the entire
-		// db in memory in order to detect which files went
-		// away...
+		// Detect files that have not changed, i.e. appear in
+		// the database and the filesystem. Keep track of book
+		// IDs so that once all entries have been processed we
+		// can delete those books from the database where the
+		// original file has been removed.
 		allSources := SourceDB | SourceFS
 		tmp := make(map[string]int)
 		ids := make(map[string]BookId)
 		for f := range fileCh {
-			log.Printf("differ: %#v", f)
+			// log.Printf("differ: %#v", f)
 			tmp[f.path] |= f.source
+			// Delete entries as soon as we've seen them
+			// from both sources.
 			if tmp[f.path] == allSources {
-				log.Printf("differ: dropping %s", f.path)
+				// log.Printf("differ: dropping %s", f.path)
 				delete(tmp, f.path)
 				delete(ids, f.path)
 			}
@@ -121,9 +129,9 @@ func adder(db *Database, chooser MetadataChooserFunc, fileCh chan fileData) {
 		}
 
 		var err error
-		f.id, err = addNewBook(db, f, oldid, chooser)
+		f.id, err = importBook(db, f, oldid, chooser)
 		if err != nil {
-			log.Printf("Could not add %s: % v", f.path, err)
+			log.Printf("Could not add %s: %v", f.path, err)
 			continue
 		}
 
@@ -134,32 +142,50 @@ func adder(db *Database, chooser MetadataChooserFunc, fileCh chan fileData) {
 	}
 }
 
-func addNewBook(db *Database, f fileData, oldid BookId, chooser MetadataChooserFunc) (BookId, error) {
+func importBook(db *Database, f fileData, oldid BookId, chooser MetadataChooserFunc) (BookId, error) {
+	// Attempt direct metadata extraction.
 	book, err := Parse(f.path)
 	if err != nil {
 		return 0, err
 	}
 
-	candidates, err := LookupGoogleBooks(book.Metadata)
-	if err == nil && len(candidates) > 0 {
-		if len(candidates) == 1 {
-			log.Printf("found Google Books match: %s", candidates[0].String())
-			book.Metadata.Merge(candidates[0])
-		} else {
-			if userchoice := chooser(f.path, candidates); userchoice != nil {
-				book.Metadata.Merge(userchoice)
-			}
-		}
-	}
-
 	if oldid != 0 {
 		book.Id = oldid
 	} else {
 		book.Id = NewID()
 	}
 
-	// Try to find a cover image.
-	if imageData, err := GetGoogleBooksCover(book.Metadata); err == nil {
+	// Check if a Calibre OPF file exists.
+	if opfmeta, err := opfOpen(opfMetadataPath(f.path)); err == nil {
+		book.Metadata.Merge(opfmeta)
+	} else {
+		// No local metadata, use Google Books to retrieve
+		// more information on the book. Ask the user to
+		// choose in case there are multiple results.
+		candidates, err := LookupGoogleBooks(book.Metadata)
+		if err == nil && len(candidates) > 0 {
+			if len(candidates) == 1 {
+				log.Printf("found Google Books match: %s", candidates[0].String())
+				book.Metadata.Merge(candidates[0])
+			} else {
+				if userchoice := chooser(f.path, candidates); userchoice != nil {
+					book.Metadata.Merge(userchoice)
+				}
+			}
+		}
+	}
+
+	// Check if the book metadata looks ok.
+	if !book.Metadata.Sufficient() {
+		return 0, errors.New("insufficient metadata")
+	}
+
+	// Try to find a cover image. Look on the local filesystem
+	// first, otherwise check Google Books.
+	localCoverPath := opfCoverPath(f.path)
+	if _, err := os.Stat(localCoverPath); err == nil {
+		book.CoverPath = localCoverPath
+	} else if imageData, err := GetGoogleBooksCover(book.Metadata); err == nil {
 		imageFileName := book.Path + ".cover.png"
 		if imgf, err := os.Create(imageFileName); err != nil {
 			log.Printf("Could not save cover image for %d: %v", book.Id, err)
@@ -170,6 +196,7 @@ func addNewBook(db *Database, f fileData, oldid BookId, chooser MetadataChooserF
 		}
 	}
 
+	// Save the book in our database.
 	if err := db.PutBook(book); err != nil {
 		return book.Id, err
 	}
-- 
GitLab