database.go 12.6 KB
Newer Older
ale's avatar
ale committed
1 2 3 4
package liber

import (
	"bytes"
ale's avatar
ale committed
5
	cryptorand "crypto/rand"
ale's avatar
ale committed
6
	"encoding/binary"
7
	"encoding/gob"
ale's avatar
ale committed
8
	"errors"
ale's avatar
ale committed
9
	"fmt"
10
	"log"
ale's avatar
ale committed
11 12 13 14 15 16
	"math/rand"
	"os"
	"path/filepath"
	"strconv"
	"time"

ale's avatar
ale committed
17 18 19 20 21 22
	"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve"
	_ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer"
	_ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer"
	_ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer"
	_ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer"
	_ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/token_filters/edge_ngram_filter"
ale's avatar
ale committed
23
	blevegoleveldb "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/goleveldb"
24 25 26 27 28 29

	"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb"
	ldbfilter "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter"
	ldbiter "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator"
	ldbopt "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
	ldbutil "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
ale's avatar
ale committed
30 31 32
)

var (
ale's avatar
ale committed
33 34 35
	BookBucket     = []byte("ebook")
	FileBucket     = []byte("file")
	BookFileBucket = []byte("ebook_file")
ale's avatar
ale committed
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51

	keySeparator = byte('/')
)

type BookId uint64

func (id BookId) String() string {
	return strconv.FormatUint(uint64(id), 10)
}

func (id BookId) Key() []byte {
	var buf bytes.Buffer
	binary.Write(&buf, binary.LittleEndian, id)
	return buf.Bytes()
}

ale's avatar
ale committed
52 53 54 55 56 57
type Book struct {
	Id        BookId
	CoverPath string
	Metadata  *Metadata
}

ale's avatar
ale committed
58 59 60 61
func (b *Book) String() string {
	return fmt.Sprintf("%s (%s)", b.Metadata.String(), b.Id.String())
}

ale's avatar
ale committed
62 63 64 65 66 67 68 69 70 71 72 73 74
type File struct {
	Path     string
	FileType string
	Mtime    time.Time
	Size     int64
	Error    bool
	Id       BookId
}

func (f *File) HasChanged(info os.FileInfo) bool {
	return !info.ModTime().Equal(f.Mtime) || info.Size() != f.Size
}

ale's avatar
ale committed
75 76 77 78 79 80 81
func init() {
	// Seed the RNG to a random value.
	var seed int64
	binary.Read(cryptorand.Reader, binary.LittleEndian, &seed)
	rand.Seed(seed)
}

ale's avatar
ale committed
82 83 84 85 86 87 88 89 90
func NewID() BookId {
	return BookId(rand.Int63())
}

func ParseID(s string) BookId {
	id, _ := strconv.ParseUint(s, 10, 64)
	return BookId(id)
}

ale's avatar
ale committed
91 92 93 94 95 96 97
// The structure that gets actually indexed.
type flatBook struct {
	Title       string   `json:"title"`
	Author      []string `json:"author"`
	Description string   `json:"description"`
	ISBN        []string `json:"isbn"`
	Unique      []string `json:"_unique"`
98
	Suggest     []string `json:"_suggest"`
ale's avatar
ale committed
99 100 101 102 103 104 105
}

func (f *flatBook) Type() string {
	return "ebook"
}

func flatten(book *Book) *flatBook {
106 107 108 109
	suggest := []string{book.Metadata.Title}
	if len(book.Metadata.Creator) > 0 {
		suggest = append(suggest, book.Metadata.Creator...)
	}
ale's avatar
ale committed
110 111 112 113 114 115
	return &flatBook{
		Title:       book.Metadata.Title,
		Author:      book.Metadata.Creator,
		Description: book.Metadata.Description,
		ISBN:        book.Metadata.ISBN,
		Unique:      book.Metadata.Uniques(),
116
		Suggest:     suggest,
ale's avatar
ale committed
117 118 119 120 121
	}
}

var defaultTextAnalyzer = "standard"

ale's avatar
ale committed
122 123 124
func metadataDocumentMapping() *bleve.DocumentMapping {
	md := bleve.NewDocumentMapping()

ale's avatar
ale committed
125 126 127
	textFieldMapping := bleve.NewTextFieldMapping()
	textFieldMapping.Store = false
	textFieldMapping.Analyzer = defaultTextAnalyzer
ale's avatar
ale committed
128 129 130

	authorFieldMapping := bleve.NewTextFieldMapping()
	authorFieldMapping.Store = false
ale's avatar
ale committed
131
	authorFieldMapping.Analyzer = "simple"
ale's avatar
ale committed
132

ale's avatar
ale committed
133 134 135 136
	keywordFieldMapping := bleve.NewTextFieldMapping()
	keywordFieldMapping.Store = false
	keywordFieldMapping.Analyzer = "keyword"
	keywordFieldMapping.IncludeInAll = false
ale's avatar
ale committed
137

138 139 140 141 142 143
	suggestFieldMapping := bleve.NewTextFieldMapping()
	suggestFieldMapping.Store = false
	suggestFieldMapping.Analyzer = "edgeNgram"
	suggestFieldMapping.IncludeTermVectors = false
	suggestFieldMapping.IncludeInAll = false

ale's avatar
ale committed
144 145 146 147 148
	md.AddFieldMappingsAt("title", textFieldMapping)
	md.AddFieldMappingsAt("author", authorFieldMapping)
	md.AddFieldMappingsAt("description", textFieldMapping)
	md.AddFieldMappingsAt("isbn", keywordFieldMapping)
	md.AddFieldMappingsAt("_unique", keywordFieldMapping)
149
	md.AddFieldMappingsAt("_suggest", suggestFieldMapping)
ale's avatar
ale committed
150 151 152 153 154 155

	return md
}

func defaultIndexMapping() *bleve.IndexMapping {
	i := bleve.NewIndexMapping()
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170

	err := i.AddCustomTokenFilter("edgeNgram325",
		map[string]interface{}{
			"type": "edge_ngram",
			"min":  3.0,
			"max":  25.0,
		})
	if err != nil {
		log.Fatal(err)
	}

	err = i.AddCustomAnalyzer("edgeNgram",
		map[string]interface{}{
			"type":      "custom",
			"tokenizer": "unicode",
171
			"token_filters": []interface{}{
172 173 174 175 176 177 178 179 180
				"to_lower",
				"stop_en",
				"edgeNgram325",
			},
		})
	if err != nil {
		log.Fatal(err)
	}

ale's avatar
ale committed
181
	i.AddDocumentMapping("ebook", metadataDocumentMapping())
182

ale's avatar
ale committed
183 184
	i.DefaultAnalyzer = defaultTextAnalyzer
	i.DefaultType = "ebook"
ale's avatar
ale committed
185 186 187 188
	return i
}

type Database struct {
189 190
	path  string
	ldb   *leveldb.DB
ale's avatar
ale committed
191 192 193 194 195 196 197 198 199 200 201 202
	index bleve.Index
}

func NewDb(path string) (*Database, error) {
	// Make sure that path exists.
	if _, err := os.Stat(path); err != nil {
		if err := os.Mkdir(path, 0700); err != nil {
			return nil, err
		}
	}

	// Initialize our database and the index.
203
	d := &Database{path: path}
ale's avatar
ale committed
204 205 206 207 208 209 210 211 212 213
	if err := d.setupLevelDb(filepath.Join(path, "db")); err != nil {
		return nil, err
	}
	if err := d.setupIndex(filepath.Join(path, "index")); err != nil {
		return nil, err
	}
	return d, nil
}

func (db *Database) setupLevelDb(path string) error {
ale's avatar
ale committed
214
	// Use 128MB of cache and a small Bloom filter.
215 216
	opts := &ldbopt.Options{
		Filter:             ldbfilter.NewBloomFilter(10),
ale's avatar
ale committed
217
		BlockCacheCapacity: 2 << 27,
218 219 220
	}

	ldb, err := leveldb.OpenFile(path, opts)
ale's avatar
ale committed
221 222 223
	if err != nil {
		return err
	}
224
	db.ldb = ldb
ale's avatar
ale committed
225 226 227 228 229 230 231 232
	return nil
}

func (db *Database) setupIndex(path string) error {
	var err error
	if _, serr := os.Stat(path); serr == nil {
		db.index, err = bleve.Open(path)
	} else {
ale's avatar
ale committed
233 234 235 236 237 238 239
		// Create a new Bleve index, backed by goleveldb.
		db.index, err = bleve.NewUsing(path, defaultIndexMapping(), bleve.Config.DefaultIndexType, blevegoleveldb.Name, map[string]interface{}{
			"create_if_missing":         true,
			"write_buffer_size":         2 << 25,
			"lru_cache_capacity":        2 << 27,
			"bloom_filter_bits_per_key": 10,
		})
ale's avatar
ale committed
240 241 242 243 244 245 246 247 248
	}
	if err != nil {
		return err
	}
	return nil
}

func (db *Database) Close() {
	db.index.Close()
249
	db.ldb.Close()
ale's avatar
ale committed
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
}

func (db *Database) GetBook(bookid BookId) (*Book, error) {
	var b Book
	if err := db.Get(BookBucket, bookid.Key(), &b); err != nil {
		return nil, err
	}
	return &b, nil
}

func (db *Database) GetFile(path string) (*File, error) {
	var f File
	if err := db.Get(FileBucket, []byte(path), &f); err != nil {
		return nil, err
	}
	return &f, nil
}

ale's avatar
ale committed
268 269
func (db *Database) GetBookFiles(bookid BookId) ([]*File, error) {
	start, end := keyRange(bktToKey(BookFileBucket, bookid.Key()))
270 271 272 273 274 275 276 277 278
	it := db.ldb.NewIterator(
		&ldbutil.Range{
			Start: start,
			Limit: end,
		},
		nil,
	)
	defer it.Release()

ale's avatar
ale committed
279
	var out []*File
280
	for it.Next() {
ale's avatar
ale committed
281
		var filepath string
282
		if gob.NewDecoder(bytes.NewReader(it.Value())).Decode(&filepath) == nil {
ale's avatar
ale committed
283 284 285 286 287
			if file, err := db.GetFile(filepath); err == nil {
				out = append(out, file)
			}
		}
	}
288 289 290
	if err := it.Error(); err != nil {
		return nil, err
	}
ale's avatar
ale committed
291 292 293
	return out, nil
}

ale's avatar
ale committed
294
func (db *Database) Get(bucket, key []byte, obj interface{}) error {
295
	data, err := db.ldb.Get(bktToKey(bucket, key), nil)
ale's avatar
ale committed
296 297 298
	if err != nil {
		return err
	}
299
	return gob.NewDecoder(bytes.NewReader(data)).Decode(obj)
ale's avatar
ale committed
300 301 302 303 304 305
}

func (db *Database) PutBook(b *Book) error {
	if err := db.Put(BookBucket, b.Id.Key(), b); err != nil {
		return err
	}
ale's avatar
ale committed
306
	return db.index.Index(b.Id.String(), flatten(b))
ale's avatar
ale committed
307 308
}

ale's avatar
ale committed
309 310 311 312
func fileBookKey(path string, bookid BookId) []byte {
	return bytes.Join([][]byte{bookid.Key(), []byte(path)}, []byte{keySeparator})
}

ale's avatar
ale committed
313
func (db *Database) PutFile(f *File) error {
ale's avatar
ale committed
314 315 316 317 318 319 320
	if err := db.Put(FileBucket, []byte(f.Path), f); err != nil {
		return err
	}
	if !f.Error {
		return db.Put(BookFileBucket, fileBookKey(f.Path, f.Id), f.Path)
	}
	return nil
ale's avatar
ale committed
321 322
}

ale's avatar
ale committed
323
func (db *Database) RawPut(key, value []byte) error {
324
	return db.ldb.Put(key, value, nil)
ale's avatar
ale committed
325 326
}

ale's avatar
ale committed
327
func (db *Database) Put(bucket, key []byte, obj interface{}) error {
328 329
	var buf bytes.Buffer
	if err := gob.NewEncoder(&buf).Encode(obj); err != nil {
ale's avatar
ale committed
330 331
		return err
	}
ale's avatar
ale committed
332
	return db.RawPut(bktToKey(bucket, key), buf.Bytes())
ale's avatar
ale committed
333 334 335 336 337 338 339
}

func (db *Database) DeleteBook(bookid BookId) error {
	db.Delete(BookBucket, bookid.Key())
	return db.index.Delete(bookid.String())
}

ale's avatar
ale committed
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
func (db *Database) DeleteFile(path string) error {
	f, err := db.GetFile(path)
	if err != nil {
		return nil
	}

	db.Delete(FileBucket, []byte(path))
	db.Delete(BookFileBucket, fileBookKey(path, f.Id))

	// Delete the book if there are no files left.
	if files, err := db.GetBookFiles(f.Id); err == nil && len(files) == 0 {
		db.DeleteBook(f.Id)
	}

	return nil
}

ale's avatar
ale committed
357
func (db *Database) Delete(bucket, key []byte) error {
358
	return db.ldb.Delete(bktToKey(bucket, key), nil)
ale's avatar
ale committed
359 360 361
}

type DatabaseIterator struct {
362
	iter ldbiter.Iterator
ale's avatar
ale committed
363 364
}

365 366 367
func (i *DatabaseIterator) Close() error {
	defer i.iter.Release()
	return i.iter.Error()
ale's avatar
ale committed
368 369
}

370 371
func (i *DatabaseIterator) Next() bool {
	return i.iter.Next()
ale's avatar
ale committed
372 373 374 375 376 377
}

func (i *DatabaseIterator) Id() BookId {
	return keyToId(i.iter.Key())
}

ale's avatar
ale committed
378 379 380 381 382 383 384 385
func (i *DatabaseIterator) RawKey() []byte {
	return i.iter.Key()
}

func (i *DatabaseIterator) RawValue() []byte {
	return i.iter.Value()
}

ale's avatar
ale committed
386
func (i *DatabaseIterator) Value(obj interface{}) error {
387
	return gob.NewDecoder(bytes.NewReader(i.iter.Value())).Decode(obj)
ale's avatar
ale committed
388 389 390 391 392
}

// Scan an entire bucket.
func (db *Database) Scan(bucket []byte) *DatabaseIterator {
	start, end := keyRange(bucket)
393 394 395 396 397 398 399 400
	it := db.ldb.NewIterator(&ldbutil.Range{
		Start: start,
		Limit: end,
	}, &ldbopt.ReadOptions{
		DontFillCache: true,
	})

	return &DatabaseIterator{iter: it}
ale's avatar
ale committed
401 402
}

403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
// Reindex the entire database. This is an administrative operation,
// to be performed after an incompatible index schema change. It will
// delete the existing index and re-create it from scratch.
func (db *Database) Reindex() error {
	// Close the index, delete it, and re-open it.
	db.index.Close()

	indexPath := filepath.Join(db.path, "index")
	if err := os.RemoveAll(indexPath); err != nil {
		return err
	}
	if err := db.setupIndex(indexPath); err != nil {
		return err
	}

	// Scan the database and re-index everything.
ale's avatar
ale committed
419
	i := db.Scan(BookBucket)
420
	for i.Next() {
421 422 423 424 425 426
		var book Book
		if err := i.Value(&book); err != nil {
			continue
		}
		db.index.Index(i.Id().String(), flatten(&book))
	}
427
	return i.Close()
428 429
}

ale's avatar
ale committed
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
type SearchResult struct {
	Results    []*Book
	NumResults int
}

func (db *Database) doSearch(query bleve.Query, offset, limit int) (*SearchResult, error) {
	req := bleve.NewSearchRequestOptions(query, limit, offset, false)
	result, err := db.index.Search(req)
	if err != nil {
		return nil, err
	}

	sr := SearchResult{NumResults: int(result.Total)}
	for _, r := range result.Hits {
		if book, err := db.GetBook(ParseID(r.ID)); err == nil {
			sr.Results = append(sr.Results, book)
		}
	}
	return &sr, nil
}

// Search the database with a query string.
func (db *Database) Search(queryStr string, offset, limit int) (*SearchResult, error) {
	return db.doSearch(bleve.NewQueryStringQuery(queryStr), offset, limit)
}

// Autocomplete runs a fuzzy search for a term.
457 458 459
func (db *Database) Suggest(term string) (*SearchResult, error) {
	query := bleve.NewTermQuery(term).SetField("_suggest")
	return db.doSearch(query, 0, 20)
ale's avatar
ale committed
460 461 462
}

// Find a book matching the given metadata, if possible.
ale's avatar
ale committed
463 464
func (db *Database) Find(uniqueIds []string) (*Book, error) {
	var queries []bleve.Query
ale's avatar
ale committed
465 466
	var query bleve.Query

ale's avatar
ale committed
467 468 469 470
	for _, u := range uniqueIds {
		queries = append(queries, bleve.NewTermQuery(u).SetField("_unique"))
	}
	if len(queries) > 0 {
ale's avatar
ale committed
471 472
		query = bleve.NewDisjunctionQuery(queries)
	} else {
ale's avatar
ale committed
473
		query = queries[0]
ale's avatar
ale committed
474 475 476 477 478 479 480
	}

	search := bleve.NewSearchRequest(query)
	result, err := db.index.Search(search)
	if err != nil {
		return nil, err
	}
ale's avatar
ale committed
481 482
	if len(result.Hits) == 0 {
		return nil, errors.New("no matches found")
ale's avatar
ale committed
483
	}
ale's avatar
ale committed
484 485

	return db.GetBook(ParseID(result.Hits[0].ID))
ale's avatar
ale committed
486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512
}

func bktToKey(bucket, key []byte) []byte {
	return bytes.Join([][]byte{bucket, key}, []byte{keySeparator})
}

// Input is a full key (including bucket).
func keyToId(key []byte) BookId {
	n := bytes.Index(key, []byte{keySeparator})
	if n < 0 {
		return 0
	}

	var id uint64
	binary.Read(bytes.NewReader(key[n+1:]), binary.LittleEndian, &id)
	return BookId(id)
}

func keyRange(prefix []byte) ([]byte, []byte) {
	start := make([]byte, len(prefix)+1)
	end := make([]byte, len(prefix)+1)
	copy(start, prefix)
	copy(end, prefix)
	start[len(prefix)] = keySeparator
	end[len(prefix)] = keySeparator + 1
	return start, end
}