package liber import ( "bytes" cryptorand "crypto/rand" "encoding/binary" "encoding/gob" "errors" "log" "math/rand" "os" "path/filepath" "strconv" "time" "github.com/blevesearch/bleve" "github.com/jmhodges/levigo" ) var ( BookBucket = []byte("ebook") FileBucket = []byte("file") BookFileBucket = []byte("ebook_file") keySeparator = byte('/') ) type BookId uint64 func (id BookId) String() string { return strconv.FormatUint(uint64(id), 10) } func (id BookId) Key() []byte { var buf bytes.Buffer binary.Write(&buf, binary.LittleEndian, id) return buf.Bytes() } type Book struct { Id BookId CoverPath string Metadata *Metadata } type File struct { Path string FileType string Mtime time.Time Size int64 Error bool Id BookId } func (f *File) HasChanged(info os.FileInfo) bool { return !info.ModTime().Equal(f.Mtime) || info.Size() != f.Size } func init() { // Seed the RNG to a random value. var seed int64 binary.Read(cryptorand.Reader, binary.LittleEndian, &seed) rand.Seed(seed) } func NewID() BookId { return BookId(rand.Int63()) } func ParseID(s string) BookId { id, _ := strconv.ParseUint(s, 10, 64) return BookId(id) } // The structure that gets actually indexed. type flatBook struct { Title string `json:"title"` Author []string `json:"author"` Description string `json:"description"` ISBN []string `json:"isbn"` Unique []string `json:"_unique"` Suggest []string `json:"_suggest"` } func (f *flatBook) Type() string { return "ebook" } func flatten(book *Book) *flatBook { suggest := []string{book.Metadata.Title} if len(book.Metadata.Creator) > 0 { suggest = append(suggest, book.Metadata.Creator...) } return &flatBook{ Title: book.Metadata.Title, Author: book.Metadata.Creator, Description: book.Metadata.Description, ISBN: book.Metadata.ISBN, Unique: book.Metadata.Uniques(), Suggest: suggest, } } var defaultTextAnalyzer = "standard" func metadataDocumentMapping() *bleve.DocumentMapping { md := bleve.NewDocumentMapping() textFieldMapping := bleve.NewTextFieldMapping() textFieldMapping.Store = false textFieldMapping.Analyzer = defaultTextAnalyzer authorFieldMapping := bleve.NewTextFieldMapping() authorFieldMapping.Store = false authorFieldMapping.Analyzer = "simple" keywordFieldMapping := bleve.NewTextFieldMapping() keywordFieldMapping.Store = false keywordFieldMapping.Analyzer = "keyword" keywordFieldMapping.IncludeInAll = false suggestFieldMapping := bleve.NewTextFieldMapping() suggestFieldMapping.Store = false suggestFieldMapping.Analyzer = "edgeNgram" suggestFieldMapping.IncludeTermVectors = false suggestFieldMapping.IncludeInAll = false md.AddFieldMappingsAt("title", textFieldMapping) md.AddFieldMappingsAt("author", authorFieldMapping) md.AddFieldMappingsAt("description", textFieldMapping) md.AddFieldMappingsAt("isbn", keywordFieldMapping) md.AddFieldMappingsAt("_unique", keywordFieldMapping) md.AddFieldMappingsAt("_suggest", suggestFieldMapping) return md } func defaultIndexMapping() *bleve.IndexMapping { i := bleve.NewIndexMapping() err := i.AddCustomTokenFilter("edgeNgram325", map[string]interface{}{ "type": "edge_ngram", "min": 3.0, "max": 25.0, }) if err != nil { log.Fatal(err) } err = i.AddCustomAnalyzer("edgeNgram", map[string]interface{}{ "type": "custom", "tokenizer": "unicode", "token_filters": []interface{}{ "to_lower", "stop_en", "edgeNgram325", }, }) if err != nil { log.Fatal(err) } i.AddDocumentMapping("ebook", metadataDocumentMapping()) i.DefaultAnalyzer = defaultTextAnalyzer i.DefaultType = "ebook" return i } type Database struct { leveldb *levigo.DB leveldbCache *levigo.Cache leveldbFilter *levigo.FilterPolicy index bleve.Index path string } func NewDb(path string) (*Database, error) { // Make sure that path exists. if _, err := os.Stat(path); err != nil { if err := os.Mkdir(path, 0700); err != nil { return nil, err } } // Initialize our database and the index. d := &Database{path: path} if err := d.setupLevelDb(filepath.Join(path, "db")); err != nil { return nil, err } if err := d.setupIndex(filepath.Join(path, "index")); err != nil { return nil, err } return d, nil } func (db *Database) setupLevelDb(path string) error { // Use 256MB of cache and a small Bloom filter. opts := levigo.NewOptions() db.leveldbCache = levigo.NewLRUCache(2 << 28) opts.SetCache(db.leveldbCache) db.leveldbFilter = levigo.NewBloomFilter(10) opts.SetFilterPolicy(db.leveldbFilter) opts.SetCreateIfMissing(true) leveldb, err := levigo.Open(path, opts) if err != nil { return err } db.leveldb = leveldb return nil } func (db *Database) setupIndex(path string) error { var err error if _, serr := os.Stat(path); serr == nil { db.index, err = bleve.Open(path) } else { db.index, err = bleve.New(path, defaultIndexMapping()) } if err != nil { return err } return nil } func (db *Database) Close() { db.index.Close() db.leveldb.Close() db.leveldbCache.Close() db.leveldbFilter.Close() } func (db *Database) GetBook(bookid BookId) (*Book, error) { var b Book if err := db.Get(BookBucket, bookid.Key(), &b); err != nil { return nil, err } return &b, nil } func (db *Database) GetFile(path string) (*File, error) { var f File if err := db.Get(FileBucket, []byte(path), &f); err != nil { return nil, err } return &f, nil } func (db *Database) GetBookFiles(bookid BookId) ([]*File, error) { ro := levigo.NewReadOptions() defer ro.Close() it := db.leveldb.NewIterator(ro) defer it.Close() start, end := keyRange(bktToKey(BookFileBucket, bookid.Key())) var out []*File for it.Seek(start); it.Valid() && bytes.Compare(it.Key(), end) < 0; it.Next() { var filepath string if gob.NewDecoder(bytes.NewReader(it.Value())).Decode(&filepath) == nil { if file, err := db.GetFile(filepath); err == nil { out = append(out, file) } } } return out, nil } func (db *Database) Get(bucket, key []byte, obj interface{}) error { ro := levigo.NewReadOptions() defer ro.Close() data, err := db.leveldb.Get(ro, bktToKey(bucket, key)) if err != nil { return err } return gob.NewDecoder(bytes.NewReader(data)).Decode(obj) } func (db *Database) PutBook(b *Book) error { if err := db.Put(BookBucket, b.Id.Key(), b); err != nil { return err } return db.index.Index(b.Id.String(), flatten(b)) } func fileBookKey(path string, bookid BookId) []byte { return bytes.Join([][]byte{bookid.Key(), []byte(path)}, []byte{keySeparator}) } func (db *Database) PutFile(f *File) error { if err := db.Put(FileBucket, []byte(f.Path), f); err != nil { return err } if !f.Error { return db.Put(BookFileBucket, fileBookKey(f.Path, f.Id), f.Path) } return nil } func (db *Database) Put(bucket, key []byte, obj interface{}) error { var buf bytes.Buffer if err := gob.NewEncoder(&buf).Encode(obj); err != nil { return err } wo := levigo.NewWriteOptions() defer wo.Close() return db.leveldb.Put(wo, bktToKey(bucket, key), buf.Bytes()) } func (db *Database) DeleteBook(bookid BookId) error { db.Delete(BookBucket, bookid.Key()) return db.index.Delete(bookid.String()) } func (db *Database) DeleteFile(path string) error { f, err := db.GetFile(path) if err != nil { return nil } db.Delete(FileBucket, []byte(path)) db.Delete(BookFileBucket, fileBookKey(path, f.Id)) // Delete the book if there are no files left. if files, err := db.GetBookFiles(f.Id); err == nil && len(files) == 0 { db.DeleteBook(f.Id) } return nil } func (db *Database) Delete(bucket, key []byte) error { wo := levigo.NewWriteOptions() defer wo.Close() return db.leveldb.Delete(wo, bktToKey(bucket, key)) } type DatabaseIterator struct { db *levigo.DB snap *levigo.Snapshot iter *levigo.Iterator ro *levigo.ReadOptions end []byte } func (i *DatabaseIterator) Close() { i.iter.Close() i.ro.Close() i.db.ReleaseSnapshot(i.snap) } func (i *DatabaseIterator) Next() { i.iter.Next() } func (i *DatabaseIterator) Valid() bool { return i.iter.Valid() && (bytes.Compare(i.iter.Key(), i.end) < 0) } func (i *DatabaseIterator) Id() BookId { return keyToId(i.iter.Key()) } func (i *DatabaseIterator) Value(obj interface{}) error { return gob.NewDecoder(bytes.NewReader(i.iter.Value())).Decode(obj) } // Scan an entire bucket. func (db *Database) Scan(bucket []byte) *DatabaseIterator { snap := db.leveldb.NewSnapshot() ro := levigo.NewReadOptions() ro.SetFillCache(false) ro.SetSnapshot(snap) it := db.leveldb.NewIterator(ro) start, end := keyRange(bucket) it.Seek(start) return &DatabaseIterator{ db: db.leveldb, snap: snap, ro: ro, iter: it, end: end, } } // Reindex the entire database. This is an administrative operation, // to be performed after an incompatible index schema change. It will // delete the existing index and re-create it from scratch. func (db *Database) Reindex() error { // Close the index, delete it, and re-open it. db.index.Close() indexPath := filepath.Join(db.path, "index") if err := os.RemoveAll(indexPath); err != nil { return err } if err := db.setupIndex(indexPath); err != nil { return err } // Scan the database and re-index everything. for i := db.Scan(BookBucket); i.Valid(); i.Next() { var book Book if err := i.Value(&book); err != nil { continue } db.index.Index(i.Id().String(), flatten(&book)) } return nil } type SearchResult struct { Results []*Book NumResults int } func (db *Database) doSearch(query bleve.Query, offset, limit int) (*SearchResult, error) { req := bleve.NewSearchRequestOptions(query, limit, offset, false) result, err := db.index.Search(req) if err != nil { return nil, err } sr := SearchResult{NumResults: int(result.Total)} for _, r := range result.Hits { if book, err := db.GetBook(ParseID(r.ID)); err == nil { sr.Results = append(sr.Results, book) } } return &sr, nil } // Search the database with a query string. func (db *Database) Search(queryStr string, offset, limit int) (*SearchResult, error) { return db.doSearch(bleve.NewQueryStringQuery(queryStr), offset, limit) } // Autocomplete runs a fuzzy search for a term. func (db *Database) Suggest(term string) (*SearchResult, error) { query := bleve.NewTermQuery(term).SetField("_suggest") return db.doSearch(query, 0, 20) } // Find a book matching the given metadata, if possible. func (db *Database) Find(uniqueIds []string) (*Book, error) { var queries []bleve.Query var query bleve.Query for _, u := range uniqueIds { queries = append(queries, bleve.NewTermQuery(u).SetField("_unique")) } if len(queries) > 0 { query = bleve.NewDisjunctionQuery(queries) } else { query = queries[0] } search := bleve.NewSearchRequest(query) result, err := db.index.Search(search) if err != nil { return nil, err } if len(result.Hits) == 0 { return nil, errors.New("no matches found") } return db.GetBook(ParseID(result.Hits[0].ID)) } func bktToKey(bucket, key []byte) []byte { return bytes.Join([][]byte{bucket, key}, []byte{keySeparator}) } // Input is a full key (including bucket). func keyToId(key []byte) BookId { n := bytes.Index(key, []byte{keySeparator}) if n < 0 { return 0 } var id uint64 binary.Read(bytes.NewReader(key[n+1:]), binary.LittleEndian, &id) return BookId(id) } func keyRange(prefix []byte) ([]byte, []byte) { start := make([]byte, len(prefix)+1) end := make([]byte, len(prefix)+1) copy(start, prefix) copy(end, prefix) start[len(prefix)] = keySeparator end[len(prefix)] = keySeparator + 1 return start, end }