Newer
Older
"encoding/gob"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve"
_ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer"
_ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer"
_ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer"
_ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer"
_ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/token_filters/edge_ngram_filter"
blevegoleveldb "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/goleveldb"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb"
ldbfilter "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter"
ldbiter "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator"
ldbopt "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
ldbutil "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
BookBucket = []byte("ebook")
FileBucket = []byte("file")
BookFileBucket = []byte("ebook_file")
keySeparator = byte('/')
)
type BookId uint64
func (id BookId) String() string {
return strconv.FormatUint(uint64(id), 10)
}
func (id BookId) Key() []byte {
var buf bytes.Buffer
binary.Write(&buf, binary.LittleEndian, id)
return buf.Bytes()
}
type Book struct {
Id BookId
CoverPath string
Metadata *Metadata
}
func (b *Book) String() string {
return fmt.Sprintf("%s (%s)", b.Metadata.String(), b.Id.String())
}
type File struct {
Path string
FileType string
Mtime time.Time
Size int64
Error bool
Id BookId
}
func (f *File) HasChanged(info os.FileInfo) bool {
return !info.ModTime().Equal(f.Mtime) || info.Size() != f.Size
}
func init() {
// Seed the RNG to a random value.
var seed int64
binary.Read(cryptorand.Reader, binary.LittleEndian, &seed)
rand.Seed(seed)
}
func NewID() BookId {
return BookId(rand.Int63())
}
func ParseID(s string) BookId {
id, _ := strconv.ParseUint(s, 10, 64)
return BookId(id)
}
// The structure that gets actually indexed.
type flatBook struct {
Title string `json:"title"`
Author []string `json:"author"`
Description string `json:"description"`
ISBN []string `json:"isbn"`
Unique []string `json:"_unique"`
Suggest []string `json:"_suggest"`
}
func (f *flatBook) Type() string {
return "ebook"
}
func flatten(book *Book) *flatBook {
suggest := []string{book.Metadata.Title}
if len(book.Metadata.Creator) > 0 {
suggest = append(suggest, book.Metadata.Creator...)
}
return &flatBook{
Title: book.Metadata.Title,
Author: book.Metadata.Creator,
Description: book.Metadata.Description,
ISBN: book.Metadata.ISBN,
Unique: book.Metadata.Uniques(),
func metadataDocumentMapping() *bleve.DocumentMapping {
md := bleve.NewDocumentMapping()
textFieldMapping := bleve.NewTextFieldMapping()
textFieldMapping.Store = false
textFieldMapping.Analyzer = defaultTextAnalyzer
authorFieldMapping := bleve.NewTextFieldMapping()
authorFieldMapping.Store = false
keywordFieldMapping := bleve.NewTextFieldMapping()
keywordFieldMapping.Store = false
keywordFieldMapping.Analyzer = "keyword"
keywordFieldMapping.IncludeInAll = false
suggestFieldMapping := bleve.NewTextFieldMapping()
suggestFieldMapping.Store = false
suggestFieldMapping.Analyzer = "edgeNgram"
suggestFieldMapping.IncludeTermVectors = false
suggestFieldMapping.IncludeInAll = false
md.AddFieldMappingsAt("title", textFieldMapping)
md.AddFieldMappingsAt("author", authorFieldMapping)
md.AddFieldMappingsAt("description", textFieldMapping)
md.AddFieldMappingsAt("isbn", keywordFieldMapping)
md.AddFieldMappingsAt("_unique", keywordFieldMapping)
md.AddFieldMappingsAt("_suggest", suggestFieldMapping)
return md
}
func defaultIndexMapping() *bleve.IndexMapping {
i := bleve.NewIndexMapping()
err := i.AddCustomTokenFilter("edgeNgram325",
map[string]interface{}{
"type": "edge_ngram",
"min": 3.0,
"max": 25.0,
})
if err != nil {
log.Fatal(err)
}
err = i.AddCustomAnalyzer("edgeNgram",
map[string]interface{}{
"type": "custom",
"tokenizer": "unicode",
"token_filters": []interface{}{
"to_lower",
"stop_en",
"edgeNgram325",
},
})
if err != nil {
log.Fatal(err)
}
i.DefaultAnalyzer = defaultTextAnalyzer
i.DefaultType = "ebook"
index bleve.Index
}
func NewDb(path string) (*Database, error) {
// Make sure that path exists.
if _, err := os.Stat(path); err != nil {
if err := os.Mkdir(path, 0700); err != nil {
return nil, err
}
}
// Initialize our database and the index.
if err := d.setupLevelDb(filepath.Join(path, "db")); err != nil {
return nil, err
}
if err := d.setupIndex(filepath.Join(path, "index")); err != nil {
return nil, err
}
return d, nil
}
func (db *Database) setupLevelDb(path string) error {
// Use 128MB of cache and a small Bloom filter.
opts := &ldbopt.Options{
Filter: ldbfilter.NewBloomFilter(10),
}
ldb, err := leveldb.OpenFile(path, opts)
return nil
}
func (db *Database) setupIndex(path string) error {
var err error
if _, serr := os.Stat(path); serr == nil {
db.index, err = bleve.Open(path)
} else {
// Create a new Bleve index, backed by goleveldb.
db.index, err = bleve.NewUsing(path, defaultIndexMapping(), bleve.Config.DefaultIndexType, blevegoleveldb.Name, map[string]interface{}{
"create_if_missing": true,
"write_buffer_size": 2 << 25,
"lru_cache_capacity": 2 << 27,
"bloom_filter_bits_per_key": 10,
})
}
if err != nil {
return err
}
return nil
}
func (db *Database) Close() {
db.index.Close()
}
func (db *Database) GetBook(bookid BookId) (*Book, error) {
var b Book
if err := db.Get(BookBucket, bookid.Key(), &b); err != nil {
return nil, err
}
return &b, nil
}
func (db *Database) GetFile(path string) (*File, error) {
var f File
if err := db.Get(FileBucket, []byte(path), &f); err != nil {
return nil, err
}
return &f, nil
}
func (db *Database) GetBookFiles(bookid BookId) ([]*File, error) {
start, end := keyRange(bktToKey(BookFileBucket, bookid.Key()))
it := db.ldb.NewIterator(
&ldbutil.Range{
Start: start,
Limit: end,
},
nil,
)
defer it.Release()
if gob.NewDecoder(bytes.NewReader(it.Value())).Decode(&filepath) == nil {
if file, err := db.GetFile(filepath); err == nil {
out = append(out, file)
}
}
}
if err := it.Error(); err != nil {
return nil, err
}
func (db *Database) Get(bucket, key []byte, obj interface{}) error {
data, err := db.ldb.Get(bktToKey(bucket, key), nil)
return gob.NewDecoder(bytes.NewReader(data)).Decode(obj)
}
func (db *Database) PutBook(b *Book) error {
if err := db.Put(BookBucket, b.Id.Key(), b); err != nil {
return err
}
func fileBookKey(path string, bookid BookId) []byte {
return bytes.Join([][]byte{bookid.Key(), []byte(path)}, []byte{keySeparator})
}
if err := db.Put(FileBucket, []byte(f.Path), f); err != nil {
return err
}
if !f.Error {
return db.Put(BookFileBucket, fileBookKey(f.Path, f.Id), f.Path)
}
return nil
func (db *Database) RawPut(key, value []byte) error {
func (db *Database) Put(bucket, key []byte, obj interface{}) error {
var buf bytes.Buffer
if err := gob.NewEncoder(&buf).Encode(obj); err != nil {
return db.RawPut(bktToKey(bucket, key), buf.Bytes())
}
func (db *Database) DeleteBook(bookid BookId) error {
db.Delete(BookBucket, bookid.Key())
return db.index.Delete(bookid.String())
}
func (db *Database) DeleteFile(path string) error {
f, err := db.GetFile(path)
if err != nil {
return nil
}
db.Delete(FileBucket, []byte(path))
db.Delete(BookFileBucket, fileBookKey(path, f.Id))
// Delete the book if there are no files left.
if files, err := db.GetBookFiles(f.Id); err == nil && len(files) == 0 {
db.DeleteBook(f.Id)
}
return nil
}
return db.ldb.Delete(bktToKey(bucket, key), nil)
func (i *DatabaseIterator) Close() error {
defer i.iter.Release()
return i.iter.Error()
func (i *DatabaseIterator) Next() bool {
return i.iter.Next()
}
func (i *DatabaseIterator) Id() BookId {
return keyToId(i.iter.Key())
}
func (i *DatabaseIterator) RawKey() []byte {
return i.iter.Key()
}
func (i *DatabaseIterator) RawValue() []byte {
return i.iter.Value()
}
return gob.NewDecoder(bytes.NewReader(i.iter.Value())).Decode(obj)
}
// Scan an entire bucket.
func (db *Database) Scan(bucket []byte) *DatabaseIterator {
start, end := keyRange(bucket)
it := db.ldb.NewIterator(&ldbutil.Range{
Start: start,
Limit: end,
}, &ldbopt.ReadOptions{
DontFillCache: true,
})
return &DatabaseIterator{iter: it}
// Reindex the entire database. This is an administrative operation,
// to be performed after an incompatible index schema change. It will
// delete the existing index and re-create it from scratch.
func (db *Database) Reindex() error {
// Close the index, delete it, and re-open it.
db.index.Close()
indexPath := filepath.Join(db.path, "index")
if err := os.RemoveAll(indexPath); err != nil {
return err
}
if err := db.setupIndex(indexPath); err != nil {
return err
}
// Scan the database and re-index everything.
var book Book
if err := i.Value(&book); err != nil {
continue
}
db.index.Index(i.Id().String(), flatten(&book))
}
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
type SearchResult struct {
Results []*Book
NumResults int
}
func (db *Database) doSearch(query bleve.Query, offset, limit int) (*SearchResult, error) {
req := bleve.NewSearchRequestOptions(query, limit, offset, false)
result, err := db.index.Search(req)
if err != nil {
return nil, err
}
sr := SearchResult{NumResults: int(result.Total)}
for _, r := range result.Hits {
if book, err := db.GetBook(ParseID(r.ID)); err == nil {
sr.Results = append(sr.Results, book)
}
}
return &sr, nil
}
// Search the database with a query string.
func (db *Database) Search(queryStr string, offset, limit int) (*SearchResult, error) {
return db.doSearch(bleve.NewQueryStringQuery(queryStr), offset, limit)
}
// Autocomplete runs a fuzzy search for a term.
func (db *Database) Suggest(term string) (*SearchResult, error) {
query := bleve.NewTermQuery(term).SetField("_suggest")
return db.doSearch(query, 0, 20)
func (db *Database) Find(uniqueIds []string) (*Book, error) {
var queries []bleve.Query
for _, u := range uniqueIds {
queries = append(queries, bleve.NewTermQuery(u).SetField("_unique"))
}
if len(queries) > 0 {
}
search := bleve.NewSearchRequest(query)
result, err := db.index.Search(search)
if err != nil {
return nil, err
}
if len(result.Hits) == 0 {
return nil, errors.New("no matches found")
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
}
func bktToKey(bucket, key []byte) []byte {
return bytes.Join([][]byte{bucket, key}, []byte{keySeparator})
}
// Input is a full key (including bucket).
func keyToId(key []byte) BookId {
n := bytes.Index(key, []byte{keySeparator})
if n < 0 {
return 0
}
var id uint64
binary.Read(bytes.NewReader(key[n+1:]), binary.LittleEndian, &id)
return BookId(id)
}
func keyRange(prefix []byte) ([]byte, []byte) {
start := make([]byte, len(prefix)+1)
end := make([]byte, len(prefix)+1)
copy(start, prefix)
copy(end, prefix)
start[len(prefix)] = keySeparator
end[len(prefix)] = keySeparator + 1
return start, end
}