Skip to content
Snippets Groups Projects
Commit 3599ba3c authored by ale's avatar ale
Browse files

Calibre integration

parent 7426f2ec
No related branches found
No related tags found
No related merge requests found
......@@ -84,6 +84,13 @@ You can run `liber --update` as many times as you like (for example
whenever you add an ebook to your collection), it will automatically
detect new files and files that have been removed.
#### Integration with Calibre
If you use Calibre to mantain your ebook collection, you can index it
with `liber` by simply pointing its `--book-dir` option at the Calibre
library directory. `liber` will read Calibre metadata files and cover
images, and it will not perform remote searches for book metadata.
### Searching
You can search the index from the command line, for example:
......
......@@ -3,6 +3,7 @@ package liber
import (
"os"
"path/filepath"
"strings"
)
type FileStorage struct {
......@@ -35,5 +36,8 @@ func (s *FileStorage) Create(path string) (*os.File, error) {
// Open a file.
func (s *FileStorage) Open(path string) (*os.File, error) {
if strings.HasPrefix(path, "/") {
return os.Open(path)
}
return os.Open(filepath.Join(s.Root, path))
}
......@@ -9,8 +9,6 @@
</head>
<body>
<script>var on_load = null;</script>
<div class="site-wrapper">
<div class="site-wrapper-inner">
<div class="cover-container">
......@@ -30,7 +28,7 @@
autocomplete="off" action="/search">
<input type="text" class="input-lg form-control"
id="search_field" data-provide="typeahead"
name="q" size="30">
name="q" size="30" autofocus>
<button type="submit" class="btn btn-lg btn-primary">
Cerca
</button>
......
......@@ -29,6 +29,11 @@ type Metadata struct {
Sources []MetadataSource
}
// Sufficient returns true if the object contains enough information.
func (m *Metadata) Sufficient() bool {
return m.Title != ""
}
// Uniques returns the list of possible unique tokens for this book.
func (m *Metadata) Uniques() []string {
var out []string
......
opf.go 0 → 100644
package liber
import (
"encoding/xml"
"io"
"os"
"path/filepath"
"strings"
)
type opfIdentifier struct {
Scheme string `xml:"http://www.idpf.org/2007/opf scheme,attr"`
Value string `xml:",chardata"`
}
type opfMeta struct {
XMLName xml.Name `xml:"http://www.idpf.org/2007/opf metadata"`
Title string `xml:"http://purl.org/dc/elements/1.1/ title"`
Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
Description string `xml:"http://purl.org/dc/elements/1.1/ description"`
Creator []string `xml:"http://purl.org/dc/elements/1.1/ creator"`
Language []string `xml:"http://purl.org/dc/elements/1.1/ language"`
Publisher []string `xml:"http://purl.org/dc/elements/1.1/ publisher"`
Identifier []opfIdentifier `xml:"http://purl.org/dc/elements/1.1/ identifier"`
}
type opfPackage struct {
Meta *opfMeta
}
func (o *opfMeta) ToMetadata() *Metadata {
m := &Metadata{
Title: o.Title,
Description: o.Description,
Publisher: o.Publisher,
Language: o.Language,
}
if o.Date != "" && o.Date != "0101-01-01T00:00:00+00:00" {
m.Date = toYear(o.Date)
}
for _, c := range o.Creator {
for _, cc := range strings.Split(c, ",") {
m.Creator = append(m.Creator, strings.TrimSpace(cc))
}
}
var uuid string
for _, id := range o.Identifier {
if id.Scheme == "ISBN" {
m.ISBN = append(m.ISBN, id.Value)
} else if id.Scheme == "uuid" {
uuid = id.Value
}
}
m.Sources = []MetadataSource{{
Name: "opf",
ID: uuid,
}}
return m
}
func opfParse(r io.Reader) (*Metadata, error) {
var opf opfPackage
if err := xml.NewDecoder(r).Decode(&opf); err != nil {
return nil, err
}
return opf.Meta.ToMetadata(), nil
}
func opfOpen(path string) (*Metadata, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
return opfParse(file)
}
func opfMetadataPath(epubPath string) string {
return filepath.Join(filepath.Dir(epubPath), "metadata.opf")
}
func opfCoverPath(epubPath string) string {
return filepath.Join(filepath.Dir(epubPath), "cover.jpg")
}
package liber
import (
"reflect"
"strings"
"testing"
)
var testOpf = `<?xml version='1.0' encoding='utf-8'?>
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
<dc:identifier opf:scheme="calibre" id="calibre_id">3</dc:identifier>
<dc:identifier opf:scheme="uuid" id="uuid_id">96e4a1ff-dd24-4966-8fe9-2d16a14b9fb0</dc:identifier>
<dc:title>Cypherpunks: Freedom and the Future of the Internet</dc:title>
<dc:creator opf:file-as="Julian Assange, Jacob Appelbaum, Andy Muller-Maguhn, Jeremie Zimmermann" opf:role="aut">Julian Assange, Jacob Appelbaum, Andy Muller-Maguhn, Jeremie Zimmermann</dc:creator>
<dc:contributor opf:file-as="calibre" opf:role="bkp">calibre (0.9.18) [http://calibre-ebook.com]</dc:contributor>
<dc:date>2012-11-25T23:00:00+00:00</dc:date>
<dc:description>&lt;div&gt;&lt;p class="description"&gt;The harassment of WikiLeaks and other Internet activists, together with attempts to introduce anti-file sharing legislation such as SOPA and ACTA, indicate that the politics of the Internet have reached a crossroads. In one direction lies a future that guarantees, in the watchwords of the cypherpunks, “privacy for the weak and transparency for the powerful”; in the other lies an Internet that allows government and large corporations to discover ever more about internet users while hiding their own activities. Assange and his co-discussants unpick the complex issues surrounding this crucial choice with clarity and engaging enthusiasm.&lt;/p&gt;&lt;p class="description"&gt;released by the CypherTeam&lt;/p&gt;&lt;/div&gt;</dc:description>
<dc:publisher>OR Books</dc:publisher>
<dc:identifier opf:scheme="MOBI-ASIN">6de78a57-3b52-45fe-9670-5621d44582d7</dc:identifier>
<dc:identifier opf:scheme="ISBN">9781939293015</dc:identifier>
<dc:language>fra</dc:language>
<dc:subject>Bisac Code 1: POL039000</dc:subject>
<meta content="{&quot;Julian Assange, Jacob Appelbaum, Andy Muller-Maguhn, Jeremie Zimmermann&quot;: &quot;&quot;}" name="calibre:author_link_map"/>
<meta content="2013-08-24T12:13:18+00:00" name="calibre:timestamp"/>
<meta content="Cypherpunks: Freedom and the Future of the Internet" name="calibre:title_sort"/>
</metadata>
<guide>
<reference href="cover.jpg" title="Cover" type="cover"/>
</guide>
</package>
`
func TestOpf_Parse(t *testing.T) {
result, err := opfParse(strings.NewReader(testOpf))
if err != nil {
t.Fatal(err)
}
expected := &Metadata{
Title: "Cypherpunks: Freedom and the Future of the Internet",
Date: "2012",
Description: "<div><p class=\"description\">The harassment of WikiLeaks and other Internet activists, together with attempts to introduce anti-file sharing legislation such as SOPA and ACTA, indicate that the politics of the Internet have reached a crossroads. In one direction lies a future that guarantees, in the watchwords of the cypherpunks, “privacy for the weak and transparency for the powerful”; in the other lies an Internet that allows government and large corporations to discover ever more about internet users while hiding their own activities. Assange and his co-discussants unpick the complex issues surrounding this crucial choice with clarity and engaging enthusiasm.</p><p class=\"description\">released by the CypherTeam</p></div>",
ISBN: []string{"9781939293015"},
Creator: []string{"Julian Assange", "Jacob Appelbaum", "Andy Muller-Maguhn", "Jeremie Zimmermann"},
Publisher: []string{"OR Books"},
Language: []string{"fra"},
Sources: []MetadataSource{{
Name: "opf",
ID: "96e4a1ff-dd24-4966-8fe9-2d16a14b9fb0",
}},
}
if !reflect.DeepEqual(result, expected) {
t.Fatalf("Result does not match: expected=%#v, got=%#v", expected, result)
}
}
package liber
import (
"errors"
"log"
"os"
"sync"
......@@ -66,6 +67,8 @@ func differ(db *Database, basedir string) chan fileData {
outCh := make(chan fileData, 100)
var wg sync.WaitGroup
wg.Add(2)
// Start two sources in parallel and send their output to fileCh.
go func() {
localFileScanner(db, basedir, fileCh)
wg.Done()
......@@ -74,22 +77,27 @@ func differ(db *Database, basedir string) chan fileData {
dbFileScanner(db, fileCh)
wg.Done()
}()
// Once they are done, close the channel.
go func() {
wg.Wait()
close(fileCh)
}()
go func() {
// Yeah we'll end up more or less keeping the entire
// db in memory in order to detect which files went
// away...
// Detect files that have not changed, i.e. appear in
// the database and the filesystem. Keep track of book
// IDs so that once all entries have been processed we
// can delete those books from the database where the
// original file has been removed.
allSources := SourceDB | SourceFS
tmp := make(map[string]int)
ids := make(map[string]BookId)
for f := range fileCh {
log.Printf("differ: %#v", f)
// log.Printf("differ: %#v", f)
tmp[f.path] |= f.source
// Delete entries as soon as we've seen them
// from both sources.
if tmp[f.path] == allSources {
log.Printf("differ: dropping %s", f.path)
// log.Printf("differ: dropping %s", f.path)
delete(tmp, f.path)
delete(ids, f.path)
}
......@@ -121,9 +129,9 @@ func adder(db *Database, chooser MetadataChooserFunc, fileCh chan fileData) {
}
var err error
f.id, err = addNewBook(db, f, oldid, chooser)
f.id, err = importBook(db, f, oldid, chooser)
if err != nil {
log.Printf("Could not add %s: % v", f.path, err)
log.Printf("Could not add %s: %v", f.path, err)
continue
}
......@@ -134,32 +142,50 @@ func adder(db *Database, chooser MetadataChooserFunc, fileCh chan fileData) {
}
}
func addNewBook(db *Database, f fileData, oldid BookId, chooser MetadataChooserFunc) (BookId, error) {
func importBook(db *Database, f fileData, oldid BookId, chooser MetadataChooserFunc) (BookId, error) {
// Attempt direct metadata extraction.
book, err := Parse(f.path)
if err != nil {
return 0, err
}
candidates, err := LookupGoogleBooks(book.Metadata)
if err == nil && len(candidates) > 0 {
if len(candidates) == 1 {
log.Printf("found Google Books match: %s", candidates[0].String())
book.Metadata.Merge(candidates[0])
} else {
if userchoice := chooser(f.path, candidates); userchoice != nil {
book.Metadata.Merge(userchoice)
}
}
}
if oldid != 0 {
book.Id = oldid
} else {
book.Id = NewID()
}
// Try to find a cover image.
if imageData, err := GetGoogleBooksCover(book.Metadata); err == nil {
// Check if a Calibre OPF file exists.
if opfmeta, err := opfOpen(opfMetadataPath(f.path)); err == nil {
book.Metadata.Merge(opfmeta)
} else {
// No local metadata, use Google Books to retrieve
// more information on the book. Ask the user to
// choose in case there are multiple results.
candidates, err := LookupGoogleBooks(book.Metadata)
if err == nil && len(candidates) > 0 {
if len(candidates) == 1 {
log.Printf("found Google Books match: %s", candidates[0].String())
book.Metadata.Merge(candidates[0])
} else {
if userchoice := chooser(f.path, candidates); userchoice != nil {
book.Metadata.Merge(userchoice)
}
}
}
}
// Check if the book metadata looks ok.
if !book.Metadata.Sufficient() {
return 0, errors.New("insufficient metadata")
}
// Try to find a cover image. Look on the local filesystem
// first, otherwise check Google Books.
localCoverPath := opfCoverPath(f.path)
if _, err := os.Stat(localCoverPath); err == nil {
book.CoverPath = localCoverPath
} else if imageData, err := GetGoogleBooksCover(book.Metadata); err == nil {
imageFileName := book.Path + ".cover.png"
if imgf, err := os.Create(imageFileName); err != nil {
log.Printf("Could not save cover image for %d: %v", book.Id, err)
......@@ -170,6 +196,7 @@ func addNewBook(db *Database, f fileData, oldid BookId, chooser MetadataChooserF
}
}
// Save the book in our database.
if err := db.PutBook(book); err != nil {
return book.Id, err
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment