Skip to content
Snippets Groups Projects
database.go 10.7 KiB
Newer Older
ale's avatar
ale committed
package liber

import (
	"bytes"
ale's avatar
ale committed
	cryptorand "crypto/rand"
ale's avatar
ale committed
	"encoding/binary"
ale's avatar
ale committed
	"errors"
ale's avatar
ale committed
	"math/rand"
	"os"
	"path/filepath"
	"strconv"
	"time"

	"github.com/blevesearch/bleve"
	"github.com/jmhodges/levigo"
)

var (
ale's avatar
ale committed
	BookBucket     = []byte("ebook")
	FileBucket     = []byte("file")
	BookFileBucket = []byte("ebook_file")
ale's avatar
ale committed

	keySeparator = byte('/')
)

type BookId uint64

func (id BookId) String() string {
	return strconv.FormatUint(uint64(id), 10)
}

func (id BookId) Key() []byte {
	var buf bytes.Buffer
	binary.Write(&buf, binary.LittleEndian, id)
	return buf.Bytes()
}

ale's avatar
ale committed
type Book struct {
	Id        BookId
	CoverPath string
	Metadata  *Metadata
}

type File struct {
	Path     string
	FileType string
	Mtime    time.Time
	Size     int64
	Error    bool
	Id       BookId
}

func (f *File) HasChanged(info os.FileInfo) bool {
	return !info.ModTime().Equal(f.Mtime) || info.Size() != f.Size
}

ale's avatar
ale committed
func init() {
	// Seed the RNG to a random value.
	var seed int64
	binary.Read(cryptorand.Reader, binary.LittleEndian, &seed)
	rand.Seed(seed)
}

ale's avatar
ale committed
func NewID() BookId {
	return BookId(rand.Int63())
}

func ParseID(s string) BookId {
	id, _ := strconv.ParseUint(s, 10, 64)
	return BookId(id)
}

ale's avatar
ale committed
// The structure that gets actually indexed.
type flatBook struct {
	Title       string   `json:"title"`
	Author      []string `json:"author"`
	Description string   `json:"description"`
	ISBN        []string `json:"isbn"`
	Unique      []string `json:"_unique"`
	Suggest     []string `json:"_suggest"`
ale's avatar
ale committed
}

func (f *flatBook) Type() string {
	return "ebook"
}

func flatten(book *Book) *flatBook {
	suggest := []string{book.Metadata.Title}
	if len(book.Metadata.Creator) > 0 {
		suggest = append(suggest, book.Metadata.Creator...)
	}
ale's avatar
ale committed
	return &flatBook{
		Title:       book.Metadata.Title,
		Author:      book.Metadata.Creator,
		Description: book.Metadata.Description,
		ISBN:        book.Metadata.ISBN,
		Unique:      book.Metadata.Uniques(),
		Suggest:     suggest,
ale's avatar
ale committed
	}
}

var defaultTextAnalyzer = "standard"

ale's avatar
ale committed
func metadataDocumentMapping() *bleve.DocumentMapping {
	md := bleve.NewDocumentMapping()

ale's avatar
ale committed
	textFieldMapping := bleve.NewTextFieldMapping()
	textFieldMapping.Store = false
	textFieldMapping.Analyzer = defaultTextAnalyzer
ale's avatar
ale committed

	authorFieldMapping := bleve.NewTextFieldMapping()
	authorFieldMapping.Store = false
ale's avatar
ale committed
	authorFieldMapping.Analyzer = "simple"
ale's avatar
ale committed

ale's avatar
ale committed
	keywordFieldMapping := bleve.NewTextFieldMapping()
	keywordFieldMapping.Store = false
	keywordFieldMapping.Analyzer = "keyword"
	keywordFieldMapping.IncludeInAll = false
ale's avatar
ale committed

	suggestFieldMapping := bleve.NewTextFieldMapping()
	suggestFieldMapping.Store = false
	suggestFieldMapping.Analyzer = "edgeNgram"
	suggestFieldMapping.IncludeTermVectors = false
	suggestFieldMapping.IncludeInAll = false

ale's avatar
ale committed
	md.AddFieldMappingsAt("title", textFieldMapping)
	md.AddFieldMappingsAt("author", authorFieldMapping)
	md.AddFieldMappingsAt("description", textFieldMapping)
	md.AddFieldMappingsAt("isbn", keywordFieldMapping)
	md.AddFieldMappingsAt("_unique", keywordFieldMapping)
	md.AddFieldMappingsAt("_suggest", suggestFieldMapping)
ale's avatar
ale committed

	return md
}

func defaultIndexMapping() *bleve.IndexMapping {
	i := bleve.NewIndexMapping()

	err := i.AddCustomTokenFilter("edgeNgram325",
		map[string]interface{}{
			"type": "edge_ngram",
			"min":  3.0,
			"max":  25.0,
		})
	if err != nil {
		log.Fatal(err)
	}

	err = i.AddCustomAnalyzer("edgeNgram",
		map[string]interface{}{
			"type":      "custom",
			"tokenizer": "unicode",
			"token_filters": []interface{}{
				"to_lower",
				"stop_en",
				"edgeNgram325",
			},
		})
	if err != nil {
		log.Fatal(err)
	}

ale's avatar
ale committed
	i.AddDocumentMapping("ebook", metadataDocumentMapping())
ale's avatar
ale committed
	i.DefaultAnalyzer = defaultTextAnalyzer
	i.DefaultType = "ebook"
ale's avatar
ale committed
	return i
}

type Database struct {
	leveldb       *levigo.DB
	leveldbCache  *levigo.Cache
	leveldbFilter *levigo.FilterPolicy

	index bleve.Index
}

func NewDb(path string) (*Database, error) {
	// Make sure that path exists.
	if _, err := os.Stat(path); err != nil {
		if err := os.Mkdir(path, 0700); err != nil {
			return nil, err
		}
	}

	// Initialize our database and the index.
	d := &Database{}
	if err := d.setupLevelDb(filepath.Join(path, "db")); err != nil {
		return nil, err
	}
	if err := d.setupIndex(filepath.Join(path, "index")); err != nil {
		return nil, err
	}
	return d, nil
}

func (db *Database) setupLevelDb(path string) error {
ale's avatar
ale committed
	// Use 256MB of cache and a small Bloom filter.
ale's avatar
ale committed
	opts := levigo.NewOptions()
	db.leveldbCache = levigo.NewLRUCache(2 << 28)
	opts.SetCache(db.leveldbCache)
	db.leveldbFilter = levigo.NewBloomFilter(10)
	opts.SetFilterPolicy(db.leveldbFilter)
	opts.SetCreateIfMissing(true)
	leveldb, err := levigo.Open(path, opts)
	if err != nil {
		return err
	}
	db.leveldb = leveldb
	return nil
}

func (db *Database) setupIndex(path string) error {
	var err error
	if _, serr := os.Stat(path); serr == nil {
		db.index, err = bleve.Open(path)
	} else {
		db.index, err = bleve.New(path, defaultIndexMapping())
	}
	if err != nil {
		return err
	}
	return nil
}

func (db *Database) Close() {
	db.index.Close()
	db.leveldb.Close()
	db.leveldbCache.Close()
	db.leveldbFilter.Close()
}

func (db *Database) GetBook(bookid BookId) (*Book, error) {
	var b Book
	if err := db.Get(BookBucket, bookid.Key(), &b); err != nil {
		return nil, err
	}
	return &b, nil
}

func (db *Database) GetFile(path string) (*File, error) {
	var f File
	if err := db.Get(FileBucket, []byte(path), &f); err != nil {
		return nil, err
	}
	return &f, nil
}

ale's avatar
ale committed
func (db *Database) GetBookFiles(bookid BookId) ([]*File, error) {
	ro := levigo.NewReadOptions()
	defer ro.Close()
	it := db.leveldb.NewIterator(ro)
	defer it.Close()
	start, end := keyRange(bktToKey(BookFileBucket, bookid.Key()))
	var out []*File
	for it.Seek(start); it.Valid() && bytes.Compare(it.Key(), end) < 0; it.Next() {
		var filepath string
		if gob.NewDecoder(bytes.NewReader(it.Value())).Decode(&filepath) == nil {
ale's avatar
ale committed
			if file, err := db.GetFile(filepath); err == nil {
				out = append(out, file)
			}
		}
	}
	return out, nil
}

ale's avatar
ale committed
func (db *Database) Get(bucket, key []byte, obj interface{}) error {
	ro := levigo.NewReadOptions()
	defer ro.Close()
	data, err := db.leveldb.Get(ro, bktToKey(bucket, key))
	if err != nil {
		return err
	}
	return gob.NewDecoder(bytes.NewReader(data)).Decode(obj)
ale's avatar
ale committed
}

func (db *Database) PutBook(b *Book) error {
	if err := db.Put(BookBucket, b.Id.Key(), b); err != nil {
		return err
	}
ale's avatar
ale committed
	return db.index.Index(b.Id.String(), flatten(b))
ale's avatar
ale committed
}

ale's avatar
ale committed
func fileBookKey(path string, bookid BookId) []byte {
	return bytes.Join([][]byte{bookid.Key(), []byte(path)}, []byte{keySeparator})
}

ale's avatar
ale committed
func (db *Database) PutFile(f *File) error {
ale's avatar
ale committed
	if err := db.Put(FileBucket, []byte(f.Path), f); err != nil {
		return err
	}
	if !f.Error {
		return db.Put(BookFileBucket, fileBookKey(f.Path, f.Id), f.Path)
	}
	return nil
ale's avatar
ale committed
}

func (db *Database) Put(bucket, key []byte, obj interface{}) error {
	var buf bytes.Buffer
	if err := gob.NewEncoder(&buf).Encode(obj); err != nil {
ale's avatar
ale committed
		return err
	}

	wo := levigo.NewWriteOptions()
	defer wo.Close()

	return db.leveldb.Put(wo, bktToKey(bucket, key), buf.Bytes())
ale's avatar
ale committed
}

func (db *Database) DeleteBook(bookid BookId) error {
	db.Delete(BookBucket, bookid.Key())
	return db.index.Delete(bookid.String())
}

ale's avatar
ale committed
func (db *Database) DeleteFile(path string) error {
	f, err := db.GetFile(path)
	if err != nil {
		return nil
	}

	db.Delete(FileBucket, []byte(path))
	db.Delete(BookFileBucket, fileBookKey(path, f.Id))

	// Delete the book if there are no files left.
	if files, err := db.GetBookFiles(f.Id); err == nil && len(files) == 0 {
		db.DeleteBook(f.Id)
	}

	return nil
}

ale's avatar
ale committed
func (db *Database) Delete(bucket, key []byte) error {
	wo := levigo.NewWriteOptions()
	defer wo.Close()
	return db.leveldb.Delete(wo, bktToKey(bucket, key))
}

type DatabaseIterator struct {
	db   *levigo.DB
	snap *levigo.Snapshot
	iter *levigo.Iterator
	ro   *levigo.ReadOptions
	end  []byte
}

func (i *DatabaseIterator) Close() {
	i.iter.Close()
	i.ro.Close()
	i.db.ReleaseSnapshot(i.snap)
}

func (i *DatabaseIterator) Next() {
	i.iter.Next()
}

func (i *DatabaseIterator) Valid() bool {
	return i.iter.Valid() && (bytes.Compare(i.iter.Key(), i.end) < 0)
}

func (i *DatabaseIterator) Id() BookId {
	return keyToId(i.iter.Key())
}

func (i *DatabaseIterator) Value(obj interface{}) error {
	return gob.NewDecoder(bytes.NewReader(i.iter.Value())).Decode(obj)
ale's avatar
ale committed
}

// Scan an entire bucket.
func (db *Database) Scan(bucket []byte) *DatabaseIterator {
	snap := db.leveldb.NewSnapshot()
	ro := levigo.NewReadOptions()
	ro.SetFillCache(false)
	ro.SetSnapshot(snap)
	it := db.leveldb.NewIterator(ro)
	start, end := keyRange(bucket)
	it.Seek(start)
	return &DatabaseIterator{
		db:   db.leveldb,
		snap: snap,
		ro:   ro,
		iter: it,
		end:  end,
	}
}

type SearchResult struct {
	Results    []*Book
	NumResults int
}

func (db *Database) doSearch(query bleve.Query, offset, limit int) (*SearchResult, error) {
	req := bleve.NewSearchRequestOptions(query, limit, offset, false)
	result, err := db.index.Search(req)
	if err != nil {
		return nil, err
	}

	sr := SearchResult{NumResults: int(result.Total)}
	for _, r := range result.Hits {
		if book, err := db.GetBook(ParseID(r.ID)); err == nil {
			sr.Results = append(sr.Results, book)
		}
	}
	return &sr, nil
}

// Search the database with a query string.
func (db *Database) Search(queryStr string, offset, limit int) (*SearchResult, error) {
	return db.doSearch(bleve.NewQueryStringQuery(queryStr), offset, limit)
}

// Autocomplete runs a fuzzy search for a term.
func (db *Database) Suggest(term string) (*SearchResult, error) {
	query := bleve.NewTermQuery(term).SetField("_suggest")
	return db.doSearch(query, 0, 20)
ale's avatar
ale committed
}

// Find a book matching the given metadata, if possible.
ale's avatar
ale committed
func (db *Database) Find(uniqueIds []string) (*Book, error) {
	var queries []bleve.Query
ale's avatar
ale committed
	var query bleve.Query

ale's avatar
ale committed
	for _, u := range uniqueIds {
		queries = append(queries, bleve.NewTermQuery(u).SetField("_unique"))
	}
	if len(queries) > 0 {
ale's avatar
ale committed
		query = bleve.NewDisjunctionQuery(queries)
	} else {
ale's avatar
ale committed
		query = queries[0]
ale's avatar
ale committed
	}

	search := bleve.NewSearchRequest(query)
	result, err := db.index.Search(search)
	if err != nil {
		return nil, err
	}
ale's avatar
ale committed
	if len(result.Hits) == 0 {
		return nil, errors.New("no matches found")
ale's avatar
ale committed
	}
ale's avatar
ale committed

	return db.GetBook(ParseID(result.Hits[0].ID))
ale's avatar
ale committed
}

func bktToKey(bucket, key []byte) []byte {
	return bytes.Join([][]byte{bucket, key}, []byte{keySeparator})
}

// Input is a full key (including bucket).
func keyToId(key []byte) BookId {
	n := bytes.Index(key, []byte{keySeparator})
	if n < 0 {
		return 0
	}

	var id uint64
	binary.Read(bytes.NewReader(key[n+1:]), binary.LittleEndian, &id)
	return BookId(id)
}

func keyRange(prefix []byte) ([]byte, []byte) {
	start := make([]byte, len(prefix)+1)
	end := make([]byte, len(prefix)+1)
	copy(start, prefix)
	copy(end, prefix)
	start[len(prefix)] = keySeparator
	end[len(prefix)] = keySeparator + 1
	return start, end
}