Commit 3599ba3c authored by ale's avatar ale

Calibre integration

parent 7426f2ec
......@@ -84,6 +84,13 @@ You can run `liber --update` as many times as you like (for example
whenever you add an ebook to your collection), it will automatically
detect new files and files that have been removed.
#### Integration with Calibre
If you use Calibre to mantain your ebook collection, you can index it
with `liber` by simply pointing its `--book-dir` option at the Calibre
library directory. `liber` will read Calibre metadata files and cover
images, and it will not perform remote searches for book metadata.
### Searching
You can search the index from the command line, for example:
......
......@@ -3,6 +3,7 @@ package liber
import (
"os"
"path/filepath"
"strings"
)
type FileStorage struct {
......@@ -35,5 +36,8 @@ func (s *FileStorage) Create(path string) (*os.File, error) {
// Open a file.
func (s *FileStorage) Open(path string) (*os.File, error) {
if strings.HasPrefix(path, "/") {
return os.Open(path)
}
return os.Open(filepath.Join(s.Root, path))
}
......@@ -9,8 +9,6 @@
</head>
<body>
<script>var on_load = null;</script>
<div class="site-wrapper">
<div class="site-wrapper-inner">
<div class="cover-container">
......@@ -30,7 +28,7 @@
autocomplete="off" action="/search">
<input type="text" class="input-lg form-control"
id="search_field" data-provide="typeahead"
name="q" size="30">
name="q" size="30" autofocus>
<button type="submit" class="btn btn-lg btn-primary">
Cerca
</button>
......
......@@ -29,6 +29,11 @@ type Metadata struct {
Sources []MetadataSource
}
// Sufficient returns true if the object contains enough information.
func (m *Metadata) Sufficient() bool {
return m.Title != ""
}
// Uniques returns the list of possible unique tokens for this book.
func (m *Metadata) Uniques() []string {
var out []string
......
package liber
import (
"encoding/xml"
"io"
"os"
"path/filepath"
"strings"
)
type opfIdentifier struct {
Scheme string `xml:"http://www.idpf.org/2007/opf scheme,attr"`
Value string `xml:",chardata"`
}
type opfMeta struct {
XMLName xml.Name `xml:"http://www.idpf.org/2007/opf metadata"`
Title string `xml:"http://purl.org/dc/elements/1.1/ title"`
Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
Description string `xml:"http://purl.org/dc/elements/1.1/ description"`
Creator []string `xml:"http://purl.org/dc/elements/1.1/ creator"`
Language []string `xml:"http://purl.org/dc/elements/1.1/ language"`
Publisher []string `xml:"http://purl.org/dc/elements/1.1/ publisher"`
Identifier []opfIdentifier `xml:"http://purl.org/dc/elements/1.1/ identifier"`
}
type opfPackage struct {
Meta *opfMeta
}
func (o *opfMeta) ToMetadata() *Metadata {
m := &Metadata{
Title: o.Title,
Description: o.Description,
Publisher: o.Publisher,
Language: o.Language,
}
if o.Date != "" && o.Date != "0101-01-01T00:00:00+00:00" {
m.Date = toYear(o.Date)
}
for _, c := range o.Creator {
for _, cc := range strings.Split(c, ",") {
m.Creator = append(m.Creator, strings.TrimSpace(cc))
}
}
var uuid string
for _, id := range o.Identifier {
if id.Scheme == "ISBN" {
m.ISBN = append(m.ISBN, id.Value)
} else if id.Scheme == "uuid" {
uuid = id.Value
}
}
m.Sources = []MetadataSource{{
Name: "opf",
ID: uuid,
}}
return m
}
func opfParse(r io.Reader) (*Metadata, error) {
var opf opfPackage
if err := xml.NewDecoder(r).Decode(&opf); err != nil {
return nil, err
}
return opf.Meta.ToMetadata(), nil
}
func opfOpen(path string) (*Metadata, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
return opfParse(file)
}
func opfMetadataPath(epubPath string) string {
return filepath.Join(filepath.Dir(epubPath), "metadata.opf")
}
func opfCoverPath(epubPath string) string {
return filepath.Join(filepath.Dir(epubPath), "cover.jpg")
}
package liber
import (
"reflect"
"strings"
"testing"
)
var testOpf = `<?xml version='1.0' encoding='utf-8'?>
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
<dc:identifier opf:scheme="calibre" id="calibre_id">3</dc:identifier>
<dc:identifier opf:scheme="uuid" id="uuid_id">96e4a1ff-dd24-4966-8fe9-2d16a14b9fb0</dc:identifier>
<dc:title>Cypherpunks: Freedom and the Future of the Internet</dc:title>
<dc:creator opf:file-as="Julian Assange, Jacob Appelbaum, Andy Muller-Maguhn, Jeremie Zimmermann" opf:role="aut">Julian Assange, Jacob Appelbaum, Andy Muller-Maguhn, Jeremie Zimmermann</dc:creator>
<dc:contributor opf:file-as="calibre" opf:role="bkp">calibre (0.9.18) [http://calibre-ebook.com]</dc:contributor>
<dc:date>2012-11-25T23:00:00+00:00</dc:date>
<dc:description>&lt;div&gt;&lt;p class="description"&gt;The harassment of WikiLeaks and other Internet activists, together with attempts to introduce anti-file sharing legislation such as SOPA and ACTA, indicate that the politics of the Internet have reached a crossroads. In one direction lies a future that guarantees, in the watchwords of the cypherpunks, “privacy for the weak and transparency for the powerful”; in the other lies an Internet that allows government and large corporations to discover ever more about internet users while hiding their own activities. Assange and his co-discussants unpick the complex issues surrounding this crucial choice with clarity and engaging enthusiasm.&lt;/p&gt;&lt;p class="description"&gt;released by the CypherTeam&lt;/p&gt;&lt;/div&gt;</dc:description>
<dc:publisher>OR Books</dc:publisher>
<dc:identifier opf:scheme="MOBI-ASIN">6de78a57-3b52-45fe-9670-5621d44582d7</dc:identifier>
<dc:identifier opf:scheme="ISBN">9781939293015</dc:identifier>
<dc:language>fra</dc:language>
<dc:subject>Bisac Code 1: POL039000</dc:subject>
<meta content="{&quot;Julian Assange, Jacob Appelbaum, Andy Muller-Maguhn, Jeremie Zimmermann&quot;: &quot;&quot;}" name="calibre:author_link_map"/>
<meta content="2013-08-24T12:13:18+00:00" name="calibre:timestamp"/>
<meta content="Cypherpunks: Freedom and the Future of the Internet" name="calibre:title_sort"/>
</metadata>
<guide>
<reference href="cover.jpg" title="Cover" type="cover"/>
</guide>
</package>
`
func TestOpf_Parse(t *testing.T) {
result, err := opfParse(strings.NewReader(testOpf))
if err != nil {
t.Fatal(err)
}
expected := &Metadata{
Title: "Cypherpunks: Freedom and the Future of the Internet",
Date: "2012",
Description: "<div><p class=\"description\">The harassment of WikiLeaks and other Internet activists, together with attempts to introduce anti-file sharing legislation such as SOPA and ACTA, indicate that the politics of the Internet have reached a crossroads. In one direction lies a future that guarantees, in the watchwords of the cypherpunks, “privacy for the weak and transparency for the powerful”; in the other lies an Internet that allows government and large corporations to discover ever more about internet users while hiding their own activities. Assange and his co-discussants unpick the complex issues surrounding this crucial choice with clarity and engaging enthusiasm.</p><p class=\"description\">released by the CypherTeam</p></div>",
ISBN: []string{"9781939293015"},
Creator: []string{"Julian Assange", "Jacob Appelbaum", "Andy Muller-Maguhn", "Jeremie Zimmermann"},
Publisher: []string{"OR Books"},
Language: []string{"fra"},
Sources: []MetadataSource{{
Name: "opf",
ID: "96e4a1ff-dd24-4966-8fe9-2d16a14b9fb0",
}},
}
if !reflect.DeepEqual(result, expected) {
t.Fatalf("Result does not match: expected=%#v, got=%#v", expected, result)
}
}
package liber
import (
"errors"
"log"
"os"
"sync"
......@@ -66,6 +67,8 @@ func differ(db *Database, basedir string) chan fileData {
outCh := make(chan fileData, 100)
var wg sync.WaitGroup
wg.Add(2)
// Start two sources in parallel and send their output to fileCh.
go func() {
localFileScanner(db, basedir, fileCh)
wg.Done()
......@@ -74,22 +77,27 @@ func differ(db *Database, basedir string) chan fileData {
dbFileScanner(db, fileCh)
wg.Done()
}()
// Once they are done, close the channel.
go func() {
wg.Wait()
close(fileCh)
}()
go func() {
// Yeah we'll end up more or less keeping the entire
// db in memory in order to detect which files went
// away...
// Detect files that have not changed, i.e. appear in
// the database and the filesystem. Keep track of book
// IDs so that once all entries have been processed we
// can delete those books from the database where the
// original file has been removed.
allSources := SourceDB | SourceFS
tmp := make(map[string]int)
ids := make(map[string]BookId)
for f := range fileCh {
log.Printf("differ: %#v", f)
// log.Printf("differ: %#v", f)
tmp[f.path] |= f.source
// Delete entries as soon as we've seen them
// from both sources.
if tmp[f.path] == allSources {
log.Printf("differ: dropping %s", f.path)
// log.Printf("differ: dropping %s", f.path)
delete(tmp, f.path)
delete(ids, f.path)
}
......@@ -121,9 +129,9 @@ func adder(db *Database, chooser MetadataChooserFunc, fileCh chan fileData) {
}
var err error
f.id, err = addNewBook(db, f, oldid, chooser)
f.id, err = importBook(db, f, oldid, chooser)
if err != nil {
log.Printf("Could not add %s: % v", f.path, err)
log.Printf("Could not add %s: %v", f.path, err)
continue
}
......@@ -134,32 +142,50 @@ func adder(db *Database, chooser MetadataChooserFunc, fileCh chan fileData) {
}
}
func addNewBook(db *Database, f fileData, oldid BookId, chooser MetadataChooserFunc) (BookId, error) {
func importBook(db *Database, f fileData, oldid BookId, chooser MetadataChooserFunc) (BookId, error) {
// Attempt direct metadata extraction.
book, err := Parse(f.path)
if err != nil {
return 0, err
}
candidates, err := LookupGoogleBooks(book.Metadata)
if err == nil && len(candidates) > 0 {
if len(candidates) == 1 {
log.Printf("found Google Books match: %s", candidates[0].String())
book.Metadata.Merge(candidates[0])
} else {
if userchoice := chooser(f.path, candidates); userchoice != nil {
book.Metadata.Merge(userchoice)
}
}
}
if oldid != 0 {
book.Id = oldid
} else {
book.Id = NewID()
}
// Try to find a cover image.
if imageData, err := GetGoogleBooksCover(book.Metadata); err == nil {
// Check if a Calibre OPF file exists.
if opfmeta, err := opfOpen(opfMetadataPath(f.path)); err == nil {
book.Metadata.Merge(opfmeta)
} else {
// No local metadata, use Google Books to retrieve
// more information on the book. Ask the user to
// choose in case there are multiple results.
candidates, err := LookupGoogleBooks(book.Metadata)
if err == nil && len(candidates) > 0 {
if len(candidates) == 1 {
log.Printf("found Google Books match: %s", candidates[0].String())
book.Metadata.Merge(candidates[0])
} else {
if userchoice := chooser(f.path, candidates); userchoice != nil {
book.Metadata.Merge(userchoice)
}
}
}
}
// Check if the book metadata looks ok.
if !book.Metadata.Sufficient() {
return 0, errors.New("insufficient metadata")
}
// Try to find a cover image. Look on the local filesystem
// first, otherwise check Google Books.
localCoverPath := opfCoverPath(f.path)
if _, err := os.Stat(localCoverPath); err == nil {
book.CoverPath = localCoverPath
} else if imageData, err := GetGoogleBooksCover(book.Metadata); err == nil {
imageFileName := book.Path + ".cover.png"
if imgf, err := os.Create(imageFileName); err != nil {
log.Printf("Could not save cover image for %d: %v", book.Id, err)
......@@ -170,6 +196,7 @@ func addNewBook(db *Database, f fileData, oldid BookId, chooser MetadataChooserF
}
}
// Save the book in our database.
if err := db.PutBook(book); err != nil {
return book.Id, err
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment