Commit 7426f2ec authored by ale's avatar ale

initial commit

parents
Copyright (c) 2014 <ale@incal.net>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
liber
=====
Software to manage an ebook collection. It supports EPUB, MOBI and PDF
formats, and it will fetch metadata and cover images on external
sources (currently Google Books). It offers powerful search
functionality over a HTTP interface.
Another useful feature is the ability to synchronize remote
collections (currently in one direction only), making it possible for
a group of people to manage a centralized ebook repository.
## Installation
Binaries are available in the form of Debian packages. They will take
care of installing all the necessary dependencies. If this isn't an
option, you can build the software from source.
### Debian packages
Add this line to your `sources.list`:
deb http://www.incal.net/ale/debian/liber/ .
Then make sure the GPG key used to sign the repository is installed,
update the package sources, and install the `liber` package:
$ wget -O- http://www.incal.net/ale/debian/repo.key \
| sudo apt-key add -
$ sudo apt-get update
$ sudo apt-get install liber
### Build from source
The source has a few dependencies:
* A working [Go](http://golang.org/) environment (note that the Go
version in Debian wheezy is too old, you should install a more recent
version from the Go website);
* LevelDB (and Snappy)
Some of the Go packages that `liber` depends upon have their own list
of dependencies ([Bleve](http://blevesearch.org/) in particular) which
you will also need to instal. On a Debian-based system, the following
command should suffice:
$ sudo apt-get install build-essential libleveldb-dev \
libsnappy-dev libicu-dev libstemmer-dev
You should then install Bleve, enabling some necessary features:
$ go get -u -tags 'leveldb icu libstemmer' github.com/blevesearch/bleve
And then download and install the `liber` source code:
$ go get -d git.autistici.org/ale/liber
$ go install git.autistici.org/ale/liber/...
This will place the resulting `liber` executable in `$GOPATH/bin`.
## Usage
`liber` will store its database in a local directory, `~/.liber` by
default. Use the `--db-dir` option if you'd like to change this.
If you want to manage a local collection, the assumption is that you
are storing all your ebooks in a single place (below a single
directory, possibly organized into further subdirectories). You can
control this with the (mandatory) option `--book-dir`.
### Indexing a local ebook collection
To index a local ebook collection, run the following command:
$ liber --book-dir=/path/to/ebooks --update
The tool will attempt to identify books on Google Books. It is
possible that more than one match is found, in which case `liber` will
open a dialog box to ask you interactively to pick the right match.
You can run `liber --update` as many times as you like (for example
whenever you add an ebook to your collection), it will automatically
detect new files and files that have been removed.
### Searching
You can search the index from the command line, for example:
$ liber --search "Das Kapital"
This will print a list of documents that match the query. For the full
query syntax, see
[the Bleve documentation](https://github.com/blevesearch/bleve/wiki/Query-String-Query).
### Synchronizing with a remote server
To upload the contents of the local database (including the file
contents themselves) to a remote `liber` server, run the following
command:
$ liber --sync=http://remote.server.address/
### Running the HTTP interface
The HTTP interface can be started with:
$ liber --http-server=:3000 --book-dir=/path/to/ebooks
This will start an HTTP server on port 3000, listening on all
interfaces. The HTTP server needs some templates and static content
which the Debian package installs in `/usr/share/liber/htdocs`.
The HTTP server will store uploaded files into the directory specified
by `--book-dir`. You should use the same value that you passed to
`liber --update`.
package main
import (
"bufio"
"bytes"
"errors"
"flag"
"fmt"
"log"
"os"
"os/exec"
"os/user"
"path/filepath"
"strconv"
"strings"
"sync"
"git.autistici.org/ale/liber"
)
var (
databaseDir = flag.String("db-dir", "~/.liber", "database directory")
bookDir = flag.String("book-dir", "", "books directory")
update = flag.Bool("update", false, "update the db")
search = flag.Bool("search", false, "search something")
remotesync = flag.String("sync", "", "push data to remote server")
httpserver = flag.String("http-server", "", "start the HTTP server on the specified address")
)
// Various ways to ask a user to choose something.
// Prompt user using stdin. Kind of annoying because it interferes
// with logging on stderr. It is used as a fallback.
func promptUserStdin(path string, choices []*liber.Metadata) (*liber.Metadata, error) {
fmt.Printf("\n[*] Possible matches for %s:\n\n", path)
for idx, md := range choices {
fmt.Printf(" %d) %s\n", idx+1, md.String())
}
prompt := "Pick one, or Enter to skip: "
rdr := bufio.NewReader(os.Stdin)
for {
fmt.Printf(prompt)
os.Stdout.Sync()
result, err := rdr.ReadString('\n')
if err != nil || result == "" {
break
}
idx, err := strconv.Atoi(strings.TrimSpace(result))
if err != nil {
fmt.Printf("%v\n", err)
continue
}
if idx < 1 || idx > len(choices) {
fmt.Printf("Insert a number between 1 and %d.\n", len(choices))
continue
}
return choices[idx-1], nil
}
return nil, nil
}
func findProgram(progs []string) (string, error) {
for _, p := range progs {
if path, err := exec.LookPath(p); err == nil {
return path, nil
}
}
return "", errors.New("not found")
}
var (
dialogProg string
dialogProgInit bool
)
func getDialogProg() (string, error) {
if !dialogProgInit {
dialogProgInit = true
dialogProgs := []string{"whiptail", "dialog"}
if os.Getenv("DISPLAY") != "" {
dialogProgs = append([]string{"gdialog", "xdialog"}, dialogProgs...)
}
if p, err := findProgram(dialogProgs); err == nil {
dialogProg = p
}
}
if dialogProg == "" {
return "", errors.New("not found")
}
return dialogProg, nil
}
// Prompt user using 'dialog', or a graphical variant if X11 is detected.
func promptUserDialog(path string, choices []*liber.Metadata) (*liber.Metadata, error) {
dialog, err := getDialogProg()
if err != nil {
return nil, err
}
args := []string{
"--title", "Metadata Chooser",
"--menu", fmt.Sprintf("Possible matches for %s:", path),
"0", "0", "0",
}
for idx, md := range choices {
args = append(args, strconv.Itoa(idx+1))
args = append(args, md.String())
}
log.Printf("running: %s %v", dialog, args)
var output bytes.Buffer
cmd := exec.Command(dialog, args...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = &output
if err := cmd.Run(); err != nil {
log.Printf("dialog failed: %v", err)
return nil, err
}
result, err := strconv.Atoi(strings.TrimSpace(output.String()))
if err != nil {
return nil, nil
}
return choices[result-1], nil
}
var promptMutex sync.Mutex
func promptUser(path string, choices []*liber.Metadata) *liber.Metadata {
promptMutex.Lock()
defer promptMutex.Unlock()
result, err := promptUserDialog(path, choices)
if err != nil {
result, err = promptUserStdin(path, choices)
if err != nil {
return nil
}
}
return result
}
func doUpdate(db *liber.Database, dir string) {
db.Update(dir, promptUser)
}
func doSync(db *liber.Database, remoteAddr string) {
sc := liber.NewRemoteServer(remoteAddr)
if err := db.Sync(sc); err != nil {
log.Fatal(err)
}
}
func doSearch(db *liber.Database, query string) {
results, err := db.Search(query, 0, 100)
if err != nil {
log.Fatal(err)
}
if results.NumResults == 0 {
fmt.Printf("No results.\n")
} else {
fmt.Printf("%d results found:\n\n", results.NumResults)
for i, r := range results.Results {
fmt.Printf("%d) %s\n", i+1, r.Metadata.String())
fmt.Printf(" %s\n", r.Path)
}
}
}
func doHttpServer(db *liber.Database, addr string) {
storage := &liber.FileStorage{
Root: *bookDir,
Nesting: 2,
}
server := liber.NewHttpServer(db, storage, addr)
log.Fatal(server.ListenAndServe())
}
func b2i(b bool) int {
if b {
return 1
}
return 0
}
func expandTilde(path string) string {
if path[:2] == "~/" {
curUser, _ := user.Current()
return filepath.Join(curUser.HomeDir, path[2:len(path)])
}
return path
}
func main() {
log.SetFlags(0)
flag.Parse()
nset := b2i(*update) + b2i(*search) + b2i(*httpserver != "") + b2i(*remotesync != "")
if nset != 1 {
log.Fatal("Must specify one of --update, --sync, --search or --http-server")
}
if *update && *bookDir == "" {
log.Fatal("Must specify --book-dir with --update")
}
dbdir := expandTilde(*databaseDir)
db, err := liber.NewDb(dbdir)
if err != nil {
log.Fatal(err)
}
defer db.Close()
if *update {
// Redirect logging to dbdir/update.log.
logf, err := os.OpenFile(filepath.Join(dbdir, "update.log"), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0644)
if err == nil {
defer logf.Close()
log.SetOutput(logf)
}
doUpdate(db, *bookDir)
} else if *remotesync != "" {
doSync(db, *remotesync)
} else if *search {
query := strings.Join(flag.Args(), " ")
if query == "" {
log.Fatal("No query specified")
}
doSearch(db, query)
} else if *httpserver != "" {
doHttpServer(db, *httpserver)
}
}
package liber
import (
"bytes"
"encoding/binary"
"encoding/json"
"errors"
"math/rand"
"os"
"path/filepath"
"strconv"
"time"
"github.com/blevesearch/bleve"
"github.com/jmhodges/levigo"
)
var (
BookBucket = []byte("ebook")
FileBucket = []byte("file")
keySeparator = byte('/')
)
type BookId uint64
func (id BookId) String() string {
return strconv.FormatUint(uint64(id), 10)
}
func (id BookId) Key() []byte {
var buf bytes.Buffer
binary.Write(&buf, binary.LittleEndian, id)
return buf.Bytes()
}
func NewID() BookId {
return BookId(rand.Int63())
}
func ParseID(s string) BookId {
id, _ := strconv.ParseUint(s, 10, 64)
return BookId(id)
}
func metadataDocumentMapping() *bleve.DocumentMapping {
md := bleve.NewDocumentMapping()
titleFieldMapping := bleve.NewTextFieldMapping()
titleFieldMapping.Analyzer = "en"
titleFieldMapping.Store = false
md.AddFieldMappingsAt("Title", titleFieldMapping)
authorFieldMapping := bleve.NewTextFieldMapping()
authorFieldMapping.Store = false
md.AddFieldMappingsAt("Creator", authorFieldMapping)
nostoreFieldMapping := bleve.NewTextFieldMapping()
nostoreFieldMapping.Store = false
nostoreFieldMapping.IncludeInAll = false
md.AddFieldMappingsAt("Description", nostoreFieldMapping)
md.AddFieldMappingsAt("ISBN", nostoreFieldMapping)
for _, ignore := range []string{"Sources", "Date", "Publisher", "Format", "Keywords", "Language"} {
md.AddSubDocumentMapping(ignore, bleve.NewDocumentDisabledMapping())
}
return md
}
func defaultIndexMapping() *bleve.IndexMapping {
i := bleve.NewIndexMapping()
i.AddDocumentMapping("ebook", metadataDocumentMapping())
i.DefaultAnalyzer = "en"
return i
}
type Book struct {
Id BookId
Path string
CoverPath string
FileType string
Metadata *Metadata
}
func (b *Book) Type() string {
return "ebook"
}
type File struct {
Path string
Mtime time.Time
Size int64
Error bool
Id BookId
}
func (f *File) HasChanged(info os.FileInfo) bool {
return !info.ModTime().Equal(f.Mtime) || info.Size() != f.Size
}
type Database struct {
leveldb *levigo.DB
leveldbCache *levigo.Cache
leveldbFilter *levigo.FilterPolicy
index bleve.Index
}
func NewDb(path string) (*Database, error) {
// Make sure that path exists.
if _, err := os.Stat(path); err != nil {
if err := os.Mkdir(path, 0700); err != nil {
return nil, err
}
}
// Initialize our database and the index.
d := &Database{}
if err := d.setupLevelDb(filepath.Join(path, "db")); err != nil {
return nil, err
}
if err := d.setupIndex(filepath.Join(path, "index")); err != nil {
return nil, err
}
return d, nil
}
func (db *Database) setupLevelDb(path string) error {
opts := levigo.NewOptions()
db.leveldbCache = levigo.NewLRUCache(2 << 28)
opts.SetCache(db.leveldbCache)
db.leveldbFilter = levigo.NewBloomFilter(10)
opts.SetFilterPolicy(db.leveldbFilter)
opts.SetCreateIfMissing(true)
leveldb, err := levigo.Open(path, opts)
if err != nil {
return err
}
db.leveldb = leveldb
return nil
}
func (db *Database) setupIndex(path string) error {
var err error
if _, serr := os.Stat(path); serr == nil {
db.index, err = bleve.Open(path)
} else {
db.index, err = bleve.New(path, defaultIndexMapping())
}
if err != nil {
return err
}
return nil
}
func (db *Database) Close() {
db.index.Close()
db.leveldb.Close()
db.leveldbCache.Close()
db.leveldbFilter.Close()
}
func (db *Database) GetBook(bookid BookId) (*Book, error) {
var b Book
if err := db.Get(BookBucket, bookid.Key(), &b); err != nil {
return nil, err
}
return &b, nil
}
func (db *Database) GetFile(path string) (*File, error) {
var f File
if err := db.Get(FileBucket, []byte(path), &f); err != nil {
return nil, err
}
return &f, nil
}
func (db *Database) Get(bucket, key []byte, obj interface{}) error {
ro := levigo.NewReadOptions()
defer ro.Close()
data, err := db.leveldb.Get(ro, bktToKey(bucket, key))
if err != nil {
return err
}
return json.Unmarshal(data, obj)
}
func (db *Database) PutBook(b *Book) error {
if err := db.Put(BookBucket, b.Id.Key(), b); err != nil {
return err
}
return db.index.Index(b.Id.String(), b.Metadata)
}
func (db *Database) PutFile(f *File) error {
return db.Put(FileBucket, []byte(f.Path), f)
}
func (db *Database) Put(bucket, key []byte, obj interface{}) error {
data, err := json.Marshal(obj)
if err != nil {
return err
}
wo := levigo.NewWriteOptions()
defer wo.Close()
return db.leveldb.Put(wo, bktToKey(bucket, key), data)
}
func (db *Database) DeleteBook(bookid BookId) error {
db.Delete(BookBucket, bookid.Key())
return db.index.Delete(bookid.String())
}
func (db *Database) Delete(bucket, key []byte) error {
wo := levigo.NewWriteOptions()
defer wo.Close()
return db.leveldb.Delete(wo, bktToKey(bucket, key))
}
type DatabaseIterator struct {
db *levigo.DB
snap *levigo.Snapshot
iter *levigo.Iterator
ro *levigo.ReadOptions
end []byte
}
func (i *DatabaseIterator) Close() {
i.iter.Close()
i.ro.Close()
i.db.ReleaseSnapshot(i.snap)
}
func (i *DatabaseIterator) Next() {
i.iter.Next()
}
func (i *DatabaseIterator) Valid() bool {
return i.iter.Valid() && (bytes.Compare(i.iter.Key(), i.end) < 0)
}
func (i *DatabaseIterator) Id() BookId {
return keyToId(i.iter.Key())
}
func (i *DatabaseIterator) Value(obj interface{}) error {
return json.Unmarshal(i.iter.Value(), obj)
}
// Scan an entire bucket.
func (db *Database) Scan(bucket []byte) *DatabaseIterator {
snap := db.leveldb.NewSnapshot()
ro := levigo.NewReadOptions()
ro.SetFillCache(false)
ro.SetSnapshot(snap)
it := db.leveldb.NewIterator(ro)
start, end := keyRange(bucket)
it.Seek(start)
return &DatabaseIterator{
db: db.leveldb,
snap: snap,
ro: ro,
iter: it,
end: end,
}
}
type SearchResult struct {
Results []*Book
NumResults int
}
func (db *Database) doSearch(query bleve.Query, offset, limit int) (*SearchResult, error) {
req := bleve.NewSearchRequestOptions(query, limit, offset, false)
result, err := db.index.Search(req)
if err != nil {
return nil, err
}
sr := SearchResult{NumResults: int(result.Total)}
for _, r := range result.Hits {
if book, err := db.GetBook(ParseID(r.ID)); err == nil {
sr.Results = append(sr.Results, book)
}
}
return &sr, nil
}
// Search the database with a query string.
func (db *Database) Search(queryStr string, offset, limit int) (*SearchResult, error) {
return db.doSearch(bleve.NewQueryStringQuery(queryStr), offset, limit)
}
// Autocomplete runs a fuzzy search for a term.
func (db *Database) Autocomplete(term string) (*SearchResult, error) {
return db.doSearch(bleve.NewFuzzyQuery(term), 0, 20)
}
// Find a book matching the given metadata, if possible.
func (db *Database) Find(m *Metadata) (*Book, error) {
var query bleve.Query
if len(m.ISBN) > 0 {
var queries []bleve.Query
for _, isbn := range m.ISBN {
q := bleve.NewTermQuery(isbn)
q.SetField("ISBN")
queries = append(queries, q)
}
query = bleve.NewDisjunctionQuery(queries)
} else {
var queries []bleve.Query
if m.Title != "" {
q := bleve.NewMatchQuery(m.Title)
q.SetField("Title")
queries = append(queries, q)
}
if len(m.Creator) > 0 {
for _, a := range m.Creator {
q := bleve.NewMatchQuery(a)
q.SetField("Creator")
queries = append(queries, q)
}
}
if len(queries) == 0 {
return nil, errors.New("insufficient metadata for query")
}
query = bleve.NewConjunctionQuery(queries)
}
search := bleve.NewSearchRequest(query)
result, err := db.index.Search(search)
if err != nil {
return nil, err
}
for _, r := range result.Hits {
book, err := db.GetBook(ParseID(r.ID))
if err != nil {
continue
}
if book.Metadata.Equals(m) {
return book, nil
}
}
return nil, errors.New("no matches found")
}
func bktToKey(bucket, key []byte) []byte {
return bytes.Join([][]byte{bucket, key}, []byte{keySeparator})
}
// Input is a full key (including bucket).
func keyToId(key []byte) BookId {
n := bytes.Index(key, []byte{keySeparator})
if n < 0 {
return 0
}
var id uint64
binary.Read(bytes.NewReader(key[n+1:]), binary.LittleEndian, &id)