package liber import ( "bytes" "encoding/binary" "encoding/json" "errors" "math/rand" "os" "path/filepath" "strconv" "time" "github.com/blevesearch/bleve" "github.com/jmhodges/levigo" ) var ( BookBucket = []byte("ebook") FileBucket = []byte("file") keySeparator = byte('/') ) type BookId uint64 func (id BookId) String() string { return strconv.FormatUint(uint64(id), 10) } func (id BookId) Key() []byte { var buf bytes.Buffer binary.Write(&buf, binary.LittleEndian, id) return buf.Bytes() } func NewID() BookId { return BookId(rand.Int63()) } func ParseID(s string) BookId { id, _ := strconv.ParseUint(s, 10, 64) return BookId(id) } func metadataDocumentMapping() *bleve.DocumentMapping { md := bleve.NewDocumentMapping() titleFieldMapping := bleve.NewTextFieldMapping() titleFieldMapping.Analyzer = "en" titleFieldMapping.Store = false md.AddFieldMappingsAt("Title", titleFieldMapping) authorFieldMapping := bleve.NewTextFieldMapping() authorFieldMapping.Store = false md.AddFieldMappingsAt("Creator", authorFieldMapping) nostoreFieldMapping := bleve.NewTextFieldMapping() nostoreFieldMapping.Store = false nostoreFieldMapping.IncludeInAll = false md.AddFieldMappingsAt("Description", nostoreFieldMapping) md.AddFieldMappingsAt("ISBN", nostoreFieldMapping) for _, ignore := range []string{"Sources", "Date", "Publisher", "Format", "Keywords", "Language"} { md.AddSubDocumentMapping(ignore, bleve.NewDocumentDisabledMapping()) } return md } func defaultIndexMapping() *bleve.IndexMapping { i := bleve.NewIndexMapping() i.AddDocumentMapping("ebook", metadataDocumentMapping()) i.DefaultAnalyzer = "en" return i } type Book struct { Id BookId Path string CoverPath string FileType string Metadata *Metadata } func (b *Book) Type() string { return "ebook" } type File struct { Path string Mtime time.Time Size int64 Error bool Id BookId } func (f *File) HasChanged(info os.FileInfo) bool { return !info.ModTime().Equal(f.Mtime) || info.Size() != f.Size } type Database struct { leveldb *levigo.DB leveldbCache *levigo.Cache leveldbFilter *levigo.FilterPolicy index bleve.Index } func NewDb(path string) (*Database, error) { // Make sure that path exists. if _, err := os.Stat(path); err != nil { if err := os.Mkdir(path, 0700); err != nil { return nil, err } } // Initialize our database and the index. d := &Database{} if err := d.setupLevelDb(filepath.Join(path, "db")); err != nil { return nil, err } if err := d.setupIndex(filepath.Join(path, "index")); err != nil { return nil, err } return d, nil } func (db *Database) setupLevelDb(path string) error { opts := levigo.NewOptions() db.leveldbCache = levigo.NewLRUCache(2 << 28) opts.SetCache(db.leveldbCache) db.leveldbFilter = levigo.NewBloomFilter(10) opts.SetFilterPolicy(db.leveldbFilter) opts.SetCreateIfMissing(true) leveldb, err := levigo.Open(path, opts) if err != nil { return err } db.leveldb = leveldb return nil } func (db *Database) setupIndex(path string) error { var err error if _, serr := os.Stat(path); serr == nil { db.index, err = bleve.Open(path) } else { db.index, err = bleve.New(path, defaultIndexMapping()) } if err != nil { return err } return nil } func (db *Database) Close() { db.index.Close() db.leveldb.Close() db.leveldbCache.Close() db.leveldbFilter.Close() } func (db *Database) GetBook(bookid BookId) (*Book, error) { var b Book if err := db.Get(BookBucket, bookid.Key(), &b); err != nil { return nil, err } return &b, nil } func (db *Database) GetFile(path string) (*File, error) { var f File if err := db.Get(FileBucket, []byte(path), &f); err != nil { return nil, err } return &f, nil } func (db *Database) Get(bucket, key []byte, obj interface{}) error { ro := levigo.NewReadOptions() defer ro.Close() data, err := db.leveldb.Get(ro, bktToKey(bucket, key)) if err != nil { return err } return json.Unmarshal(data, obj) } func (db *Database) PutBook(b *Book) error { if err := db.Put(BookBucket, b.Id.Key(), b); err != nil { return err } return db.index.Index(b.Id.String(), b.Metadata) } func (db *Database) PutFile(f *File) error { return db.Put(FileBucket, []byte(f.Path), f) } func (db *Database) Put(bucket, key []byte, obj interface{}) error { data, err := json.Marshal(obj) if err != nil { return err } wo := levigo.NewWriteOptions() defer wo.Close() return db.leveldb.Put(wo, bktToKey(bucket, key), data) } func (db *Database) DeleteBook(bookid BookId) error { db.Delete(BookBucket, bookid.Key()) return db.index.Delete(bookid.String()) } func (db *Database) Delete(bucket, key []byte) error { wo := levigo.NewWriteOptions() defer wo.Close() return db.leveldb.Delete(wo, bktToKey(bucket, key)) } type DatabaseIterator struct { db *levigo.DB snap *levigo.Snapshot iter *levigo.Iterator ro *levigo.ReadOptions end []byte } func (i *DatabaseIterator) Close() { i.iter.Close() i.ro.Close() i.db.ReleaseSnapshot(i.snap) } func (i *DatabaseIterator) Next() { i.iter.Next() } func (i *DatabaseIterator) Valid() bool { return i.iter.Valid() && (bytes.Compare(i.iter.Key(), i.end) < 0) } func (i *DatabaseIterator) Id() BookId { return keyToId(i.iter.Key()) } func (i *DatabaseIterator) Value(obj interface{}) error { return json.Unmarshal(i.iter.Value(), obj) } // Scan an entire bucket. func (db *Database) Scan(bucket []byte) *DatabaseIterator { snap := db.leveldb.NewSnapshot() ro := levigo.NewReadOptions() ro.SetFillCache(false) ro.SetSnapshot(snap) it := db.leveldb.NewIterator(ro) start, end := keyRange(bucket) it.Seek(start) return &DatabaseIterator{ db: db.leveldb, snap: snap, ro: ro, iter: it, end: end, } } type SearchResult struct { Results []*Book NumResults int } func (db *Database) doSearch(query bleve.Query, offset, limit int) (*SearchResult, error) { req := bleve.NewSearchRequestOptions(query, limit, offset, false) result, err := db.index.Search(req) if err != nil { return nil, err } sr := SearchResult{NumResults: int(result.Total)} for _, r := range result.Hits { if book, err := db.GetBook(ParseID(r.ID)); err == nil { sr.Results = append(sr.Results, book) } } return &sr, nil } // Search the database with a query string. func (db *Database) Search(queryStr string, offset, limit int) (*SearchResult, error) { return db.doSearch(bleve.NewQueryStringQuery(queryStr), offset, limit) } // Autocomplete runs a fuzzy search for a term. func (db *Database) Autocomplete(term string) (*SearchResult, error) { return db.doSearch(bleve.NewFuzzyQuery(term), 0, 20) } // Find a book matching the given metadata, if possible. func (db *Database) Find(m *Metadata) (*Book, error) { var query bleve.Query if len(m.ISBN) > 0 { var queries []bleve.Query for _, isbn := range m.ISBN { q := bleve.NewTermQuery(isbn) q.SetField("ISBN") queries = append(queries, q) } query = bleve.NewDisjunctionQuery(queries) } else { var queries []bleve.Query if m.Title != "" { q := bleve.NewMatchQuery(m.Title) q.SetField("Title") queries = append(queries, q) } if len(m.Creator) > 0 { for _, a := range m.Creator { q := bleve.NewMatchQuery(a) q.SetField("Creator") queries = append(queries, q) } } if len(queries) == 0 { return nil, errors.New("insufficient metadata for query") } query = bleve.NewConjunctionQuery(queries) } search := bleve.NewSearchRequest(query) result, err := db.index.Search(search) if err != nil { return nil, err } for _, r := range result.Hits { book, err := db.GetBook(ParseID(r.ID)) if err != nil { continue } if book.Metadata.Equals(m) { return book, nil } } return nil, errors.New("no matches found") } func bktToKey(bucket, key []byte) []byte { return bytes.Join([][]byte{bucket, key}, []byte{keySeparator}) } // Input is a full key (including bucket). func keyToId(key []byte) BookId { n := bytes.Index(key, []byte{keySeparator}) if n < 0 { return 0 } var id uint64 binary.Read(bytes.NewReader(key[n+1:]), binary.LittleEndian, &id) return BookId(id) } func keyRange(prefix []byte) ([]byte, []byte) { start := make([]byte, len(prefix)+1) end := make([]byte, len(prefix)+1) copy(start, prefix) copy(end, prefix) start[len(prefix)] = keySeparator end[len(prefix)] = keySeparator + 1 return start, end }