diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 724c57b379eb0597e18375d25d7e6ad126c61769..c3c4cf0b8ff4b5c0d38a373c1f9fe0454b906a74 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -7,7 +7,7 @@ "Deps": [ { "ImportPath": "github.com/blevesearch/bleve", - "Rev": "1006bf54b4782b0644e66a600c27e80652937d91" + "Rev": "0b171c85da0922a0baf5fd559ec8515dd35cadb8" }, { "ImportPath": "github.com/blevesearch/go-porterstemmer", @@ -20,8 +20,8 @@ }, { "ImportPath": "github.com/boltdb/bolt", - "Comment": "v1.1.0-67-g2f846c3", - "Rev": "2f846c3551b76d7710f159be840d66c3d064abbe" + "Comment": "v1.2.0-11-g831b652", + "Rev": "831b652a7f8dbefaf94da0eb66abd46c0c4bcf23" }, { "ImportPath": "github.com/golang/protobuf/proto", @@ -29,7 +29,7 @@ }, { "ImportPath": "github.com/golang/snappy", - "Rev": "c2359a1bd0bd4a2de4f1bd92ccd045fb60d0a994" + "Rev": "5f1c01d9f64b941dd9582c638279d046eda6ca31" }, { "ImportPath": "github.com/gorilla/context", @@ -53,7 +53,7 @@ }, { "ImportPath": "github.com/syndtr/goleveldb/leveldb", - "Rev": "e7e6f5b5ef25adb580feac515f9ccec514d0bda8" + "Rev": "93fc893f2dadb96ffde441c7546cc67ea290a3a8" }, { "ImportPath": "github.com/willf/bitset", @@ -66,19 +66,19 @@ }, { "ImportPath": "golang.org/x/text/encoding", - "Rev": "07b9a78963006a15c538ec5175243979025fa7a8" + "Rev": "1b466db55e0ba5d56ef5315c728216b42f796491" }, { "ImportPath": "golang.org/x/text/internal/utf8internal", - "Rev": "07b9a78963006a15c538ec5175243979025fa7a8" + "Rev": "1b466db55e0ba5d56ef5315c728216b42f796491" }, { "ImportPath": "golang.org/x/text/runes", - "Rev": "07b9a78963006a15c538ec5175243979025fa7a8" + "Rev": "1b466db55e0ba5d56ef5315c728216b42f796491" }, { "ImportPath": "golang.org/x/text/transform", - "Rev": "07b9a78963006a15c538ec5175243979025fa7a8" + "Rev": "1b466db55e0ba5d56ef5315c728216b42f796491" } ] } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/.gitignore b/Godeps/_workspace/src/github.com/blevesearch/bleve/.gitignore index 63612403b842dcc18835a0261e62bc8d21947f23..4af963278a3e048b7dafcc25b789b6cee3e73d90 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/.gitignore +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/.gitignore @@ -4,6 +4,8 @@ .#* .project .settings +**/.idea/ +**/*.iml .DS_Store /analysis/token_filters/cld2/cld2-read-only /analysis/token_filters/cld2/libcld2_full.a diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/README.md b/Godeps/_workspace/src/github.com/blevesearch/bleve/README.md index 7fa2b7523e65717bd06e2f423a610e3979ed2c14..b6057af286b375d1f61cb0c3b9bbfeff31820c4e 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/README.md +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/README.md @@ -1,6 +1,6 @@ #  bleve -[](https://travis-ci.org/blevesearch/bleve) [](https://coveralls.io/r/blevesearch/bleve?branch=master) [](https://godoc.org/github.com/blevesearch/bleve) [](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +[](https://travis-ci.org/blevesearch/bleve) [](https://coveralls.io/r/blevesearch/bleve?branch=master) [](https://godoc.org/github.com/blevesearch/bleve) [](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)[](https://codebeat.co/projects/github-com-blevesearch-bleve) modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/) diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer/simple_analyzer.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer/simple_analyzer.go index 76c75922e368c00fde379408da985659c9c5da55..c7323238b177bc4103b62d39494a834354f54c71 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer/simple_analyzer.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/analyzers/simple_analyzer/simple_analyzer.go @@ -12,14 +12,14 @@ package simple_analyzer import ( "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter" - "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/tokenizers/unicode" + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/tokenizers/letter" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/registry" ) const Name = "simple" func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - tokenizer, err := cache.TokenizerNamed(unicode.Name) + tokenizer, err := cache.TokenizerNamed(letter.Name) if err != nil { return nil, err } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/en/analyzer_en.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/en/analyzer_en.go index 1f2f99e800a302f804da19a65a2ed09dae250abc..822a8529670ebce12f58343e1cdfa6a17a2ee085 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/en/analyzer_en.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/en/analyzer_en.go @@ -7,6 +7,13 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. +// Package en implements an analyzer with reasonable defaults for processing +// English text. +// +// It strips possessive suffixes ('s), transforms tokens to lower case, +// removes stopwords from a built-in list, and applies porter stemming. +// +// The built-in stopwords list is defined in EnglishStopWords. package en import ( diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/en/possessive_filter_en.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/en/possessive_filter_en.go index e017d2f25eef5706d082688dd15cb61bd59d9a2d..5bab035f3f1b0f349d5056f759e4d45b474a8e98 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/en/possessive_filter_en.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/en/possessive_filter_en.go @@ -16,6 +16,8 @@ import ( "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/registry" ) +// PossessiveName is the name PossessiveFilter is registered as +// in the bleve registry. const PossessiveName = "possessive_en" const rightSingleQuotationMark = '’' @@ -24,6 +26,11 @@ const fullWidthApostrophe = ''' const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe +// PossessiveFilter implements a TokenFilter which +// strips the English possessive suffix ('s) from tokens. +// It handle a variety of apostrophe types, is case-insensitive +// and doesn't distinguish between possessive and contraction. +// (ie "She's So Rad" becomes "She So Rad") type PossessiveFilter struct { } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/en/stop_words_en.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/en/stop_words_en.go index 1ccc8168ee0848299400ba7a9427f8dfc9bb5dda..f0c99ec22940774d78941749b58794d8eede5f00 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/en/stop_words_en.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/en/stop_words_en.go @@ -7,10 +7,11 @@ import ( const StopName = "stop_en" +// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter. +// // this content was obtained from: // lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/ // ` was changed to ' to allow for literal string - var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt | This file is distributed under the BSD License. | See http://snowball.tartarus.org/license.php diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/ja/ja_morph_kagome.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/ja/ja_morph_kagome.go deleted file mode 100644 index 2c2b3aab28971ed5c81ff3dc9b151895899ca5b3..0000000000000000000000000000000000000000 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/ja/ja_morph_kagome.go +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package ja - -import ( - "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis" - "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/registry" - - "github.com/ikawaha/kagome/tokenizer" -) - -const TokenizerName = "kagome" - -type KagomeMorphTokenizer struct { - tok tokenizer.Tokenizer -} - -func init() { - _ = tokenizer.SysDic() // prepare system dictionary -} - -func NewKagomeMorphTokenizer() *KagomeMorphTokenizer { - return &KagomeMorphTokenizer{ - tok: tokenizer.New(), - } -} - -func NewKagomeMorphTokenizerWithUserDic(userdic tokenizer.UserDic) *KagomeMorphTokenizer { - k := tokenizer.New() - k.SetUserDic(userdic) - return &KagomeMorphTokenizer{ - tok: k, - } -} - -func (t *KagomeMorphTokenizer) Tokenize(input []byte) analysis.TokenStream { - var ( - morphs []tokenizer.Token - prevstart int - ) - - rv := make(analysis.TokenStream, 0, len(input)) - if len(input) < 1 { - return rv - } - - morphs = t.tok.Analyze(string(input), tokenizer.Search) - - for i, m := range morphs { - if m.Surface == "EOS" || m.Surface == "BOS" { - continue - } - - surfacelen := len(m.Surface) - token := &analysis.Token{ - Term: []byte(m.Surface), - Position: i, - Start: prevstart, - End: prevstart + surfacelen, - Type: analysis.Ideographic, - } - - prevstart = prevstart + surfacelen - rv = append(rv, token) - } - - return rv -} - -func KagomeMorphTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { - return NewKagomeMorphTokenizer(), nil -} - -func init() { - registry.RegisterTokenizer(TokenizerName, KagomeMorphTokenizerConstructor) -} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter/lower_case_filter.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter/lower_case_filter.go index d9e166ae80665c1723418fd8ac7cb74041a7e973..7e51085d7b37b4c32e09eb844d54deca672b3f70 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter/lower_case_filter.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter/lower_case_filter.go @@ -7,6 +7,8 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. +// Package lower_case_filter implements a TokenFilter which converts +// tokens to lower case according to unicode rules. package lower_case_filter import ( @@ -18,6 +20,7 @@ import ( "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/registry" ) +// Name is the name used to register LowerCaseFilter in the bleve registry const Name = "to_lower" type LowerCaseFilter struct { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/token_map.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/token_map.go index 17a26f48995d2f2ae34f7815c950431bf04a0ba0..e2c23788dbc8ad30c1e18cbe6ed97ac9f41eb605 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/token_map.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/token_map.go @@ -23,6 +23,9 @@ func NewTokenMap() TokenMap { return make(TokenMap, 0) } +// LoadFile reads in a list of tokens from a text file, +// one per line. +// Comments are supported using `#` or `|` func (t TokenMap) LoadFile(filename string) error { data, err := ioutil.ReadFile(filename) if err != nil { @@ -31,6 +34,9 @@ func (t TokenMap) LoadFile(filename string) error { return t.LoadBytes(data) } +// LoadBytes reads in a list of tokens from memory, +// one per line. +// Comments are supported using `#` or `|` func (t TokenMap) LoadBytes(data []byte) error { bytesReader := bytes.NewReader(data) bufioReader := bufio.NewReader(bytesReader) diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/tokenizers/character/character.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/tokenizers/character/character.go new file mode 100644 index 0000000000000000000000000000000000000000..c47596b23c35272c5a1d69b0e28096ccfbcc56b8 --- /dev/null +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/tokenizers/character/character.go @@ -0,0 +1,71 @@ +// Copyright (c) 2016 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package character + +import ( + "unicode/utf8" + + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis" +) + +type IsTokenRune func(r rune) bool + +type CharacterTokenizer struct { + isTokenRun IsTokenRune +} + +func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer { + return &CharacterTokenizer{ + isTokenRun: f, + } +} + +func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream { + + rv := make(analysis.TokenStream, 0, 1024) + + offset := 0 + start := 0 + end := 0 + count := 0 + for currRune, size := utf8.DecodeRune(input[offset:]); currRune != utf8.RuneError; currRune, size = utf8.DecodeRune(input[offset:]) { + isToken := c.isTokenRun(currRune) + if isToken { + end = offset + size + } else { + if end-start > 0 { + // build token + rv = append(rv, &analysis.Token{ + Term: input[start:end], + Start: start, + End: end, + Position: count + 1, + Type: analysis.AlphaNumeric, + }) + count++ + } + start = offset + size + end = start + } + offset += size + } + // if we ended in the middle of a token, finish it + if end-start > 0 { + // build token + rv = append(rv, &analysis.Token{ + Term: input[start:end], + Start: start, + End: end, + Position: count + 1, + Type: analysis.AlphaNumeric, + }) + } + return rv +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/ja/analyzer_ja.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/tokenizers/letter/letter.go similarity index 56% rename from Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/ja/analyzer_ja.go rename to Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/tokenizers/letter/letter.go index f63eccf0442f5e2e25998f7c933772eecaaaa0e2..f68ecf5226bfdf862b5a603445a68b13edbf2aa6 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/ja/analyzer_ja.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/tokenizers/letter/letter.go @@ -1,4 +1,4 @@ -// Copyright (c) 2014 Couchbase, Inc. +// Copyright (c) 2016 Couchbase, Inc. // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file // except in compliance with the License. You may obtain a copy of the License at // http://www.apache.org/licenses/LICENSE-2.0 @@ -7,31 +7,22 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. -package ja +package letter import ( + "unicode" + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis" - "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/token_filters/unicode_normalize" + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/tokenizers/character" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/registry" ) -const AnalyzerName = "ja" +const Name = "letter" -func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { - kagomeTokenizer, err := cache.TokenizerNamed(TokenizerName) - if err != nil { - return nil, err - } - normalizeFilter := unicode_normalize.MustNewUnicodeNormalizeFilter(unicode_normalize.NFKD) - rv := analysis.Analyzer{ - Tokenizer: kagomeTokenizer, - TokenFilters: []analysis.TokenFilter{ - normalizeFilter, - }, - } - return &rv, nil +func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { + return character.NewCharacterTokenizer(unicode.IsLetter), nil } func init() { - registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) + registry.RegisterTokenizer(Name, TokenizerConstructor) } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/config/config.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/config/config.go index a968841ac29e1b81d683fa5779333688e44d3526..f7bf4ce7f18d9fb7836efc3ec3e61bd3a3bab03e 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/config/config.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/config/config.go @@ -88,6 +88,7 @@ import ( _ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/boltdb" _ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/goleveldb" _ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/gtreap" + _ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss" // index types _ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm" diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/config/config_kagome.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/config/config_kagome.go index 017db2e3ff864daef190d5376666e746a200f754..c41611227f0673adad07bb925c606cb2863a4016 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/config/config_kagome.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/config/config_kagome.go @@ -12,5 +12,5 @@ package config import ( - _ "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/analysis/language/ja" + _ "github.com/blevesearch/blevex/lang/ja" ) diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/document.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/document.go index 90dd646363d77ee6777000cd8c0df06c3f2561ba..a59651351d1e7fa39c0156762881f17b9ff5902c 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/document.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/document.go @@ -55,3 +55,18 @@ func (d *Document) GoString() string { } return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID, fields, compositeFields) } + +func (d *Document) NumPlainTextBytes() uint64 { + rv := uint64(0) + for _, field := range d.Fields { + rv += field.NumPlainTextBytes() + } + for _, compositeField := range d.CompositeFields { + for _, field := range d.Fields { + if compositeField.includesField(field.Name()) { + rv += field.NumPlainTextBytes() + } + } + } + return rv +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field.go index ad49303d4a910f8894ff0ac4fc606a0033486bc0..257ff5df21d9f05987f747b65a539abaf343b1a7 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field.go @@ -26,4 +26,9 @@ type Field interface { Options() IndexingOptions Analyze() (int, analysis.TokenFrequencies) Value() []byte + + // NumPlainTextBytes should return the number of plain text bytes + // that this field represents - this is a common metric for tracking + // the rate of indexing + NumPlainTextBytes() uint64 } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_boolean.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_boolean.go index 29c0735c6b84dc1636696d44e5f4a96eacd1aac9..3bb6e57813ec024406b669b5cdd79f4efff38663 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_boolean.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_boolean.go @@ -18,10 +18,11 @@ import ( const DefaultBooleanIndexingOptions = StoreField | IndexField type BooleanField struct { - name string - arrayPositions []uint64 - options IndexingOptions - value []byte + name string + arrayPositions []uint64 + options IndexingOptions + value []byte + numPlainTextBytes uint64 } func (b *BooleanField) Name() string { @@ -66,12 +67,17 @@ func (b *BooleanField) GoString() string { return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value) } +func (b *BooleanField) NumPlainTextBytes() uint64 { + return b.numPlainTextBytes +} + func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField { return &BooleanField{ - name: name, - arrayPositions: arrayPositions, - value: value, - options: DefaultNumericIndexingOptions, + name: name, + arrayPositions: arrayPositions, + value: value, + options: DefaultNumericIndexingOptions, + numPlainTextBytes: uint64(len(value)), } } @@ -80,14 +86,17 @@ func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField } func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options IndexingOptions) *BooleanField { + numPlainTextBytes := 5 v := []byte("F") if b { + numPlainTextBytes = 4 v = []byte("T") } return &BooleanField{ - name: name, - arrayPositions: arrayPositions, - value: v, - options: options, + name: name, + arrayPositions: arrayPositions, + value: v, + options: options, + numPlainTextBytes: uint64(numPlainTextBytes), } } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_composite.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_composite.go index fb4b49a48bd638386d456c4a8c7f63191b609020..efe4b6df9cf68d5dfb529ae3a2954e29a71fb6d6 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_composite.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_composite.go @@ -69,7 +69,11 @@ func (c *CompositeField) Value() []byte { return []byte{} } -func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) { +func (c *CompositeField) NumPlainTextBytes() uint64 { + return 0 +} + +func (c *CompositeField) includesField(field string) bool { shouldInclude := c.defaultInclude _, fieldShouldBeIncluded := c.includedFields[field] if fieldShouldBeIncluded { @@ -79,8 +83,11 @@ func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFr if fieldShouldBeExcluded { shouldInclude = false } + return shouldInclude +} - if shouldInclude { +func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) { + if c.includesField(field) { c.totalLength += length c.compositeFrequencies.MergeAll(field, freq) } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_datetime.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_datetime.go index 1872162c4a9ec6ee78a4b0c145b1fbd161c0a0d9..3c938f3034f66966afc5522bff616dbbdd7abd85 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_datetime.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_datetime.go @@ -25,10 +25,11 @@ var MinTimeRepresentable = time.Unix(0, math.MinInt64) var MaxTimeRepresentable = time.Unix(0, math.MaxInt64) type DateTimeField struct { - name string - arrayPositions []uint64 - options IndexingOptions - value numeric_util.PrefixCoded + name string + arrayPositions []uint64 + options IndexingOptions + value numeric_util.PrefixCoded + numPlainTextBytes uint64 } func (n *DateTimeField) Name() string { @@ -95,12 +96,17 @@ func (n *DateTimeField) GoString() string { return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value) } +func (n *DateTimeField) NumPlainTextBytes() uint64 { + return n.numPlainTextBytes +} + func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField { return &DateTimeField{ - name: name, - arrayPositions: arrayPositions, - value: value, - options: DefaultDateTimeIndexingOptions, + name: name, + arrayPositions: arrayPositions, + value: value, + options: DefaultDateTimeIndexingOptions, + numPlainTextBytes: uint64(len(value)), } } @@ -117,6 +123,9 @@ func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, d arrayPositions: arrayPositions, value: prefixCoded, options: options, + // not correct, just a place holder until we revisit how fields are + // represented and can fix this better + numPlainTextBytes: uint64(8), }, nil } return nil, fmt.Errorf("cannot represent %s in this type", dt) diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_numeric.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_numeric.go index 94fdff94baa3d69e3598a063d903c2ccedade1ad..deefab23e21756bfbf65147451f8a034ce531ac7 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_numeric.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_numeric.go @@ -21,10 +21,11 @@ const DefaultNumericIndexingOptions = StoreField | IndexField const DefaultPrecisionStep uint = 4 type NumericField struct { - name string - arrayPositions []uint64 - options IndexingOptions - value numeric_util.PrefixCoded + name string + arrayPositions []uint64 + options IndexingOptions + value numeric_util.PrefixCoded + numPlainTextBytes uint64 } func (n *NumericField) Name() string { @@ -91,12 +92,17 @@ func (n *NumericField) GoString() string { return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value) } +func (n *NumericField) NumPlainTextBytes() uint64 { + return n.numPlainTextBytes +} + func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField { return &NumericField{ - name: name, - arrayPositions: arrayPositions, - value: value, - options: DefaultNumericIndexingOptions, + name: name, + arrayPositions: arrayPositions, + value: value, + options: DefaultNumericIndexingOptions, + numPlainTextBytes: uint64(len(value)), } } @@ -112,5 +118,8 @@ func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, nu arrayPositions: arrayPositions, value: prefixCoded, options: options, + // not correct, just a place holder until we revisit how fields are + // represented and can fix this better + numPlainTextBytes: uint64(8), } } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_text.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_text.go index 202c1e7acb160e38d8ea8124b8b163780f8a566c..4703c632b1329148920ba983b81d55113edba8cd 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_text.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/document/field_text.go @@ -18,11 +18,12 @@ import ( const DefaultTextIndexingOptions = IndexField type TextField struct { - name string - arrayPositions []uint64 - options IndexingOptions - analyzer *analysis.Analyzer - value []byte + name string + arrayPositions []uint64 + options IndexingOptions + analyzer *analysis.Analyzer + value []byte + numPlainTextBytes uint64 } func (t *TextField) Name() string { @@ -72,35 +73,42 @@ func (t *TextField) GoString() string { return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions) } +func (t *TextField) NumPlainTextBytes() uint64 { + return t.numPlainTextBytes +} + func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField { return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions) } func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options IndexingOptions) *TextField { return &TextField{ - name: name, - arrayPositions: arrayPositions, - options: options, - value: value, + name: name, + arrayPositions: arrayPositions, + options: options, + value: value, + numPlainTextBytes: uint64(len(value)), } } func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField { return &TextField{ - name: name, - arrayPositions: arrayPositions, - options: DefaultTextIndexingOptions, - analyzer: analyzer, - value: value, + name: name, + arrayPositions: arrayPositions, + options: DefaultTextIndexingOptions, + analyzer: analyzer, + value: value, + numPlainTextBytes: uint64(len(value)), } } func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField { return &TextField{ - name: name, - arrayPositions: arrayPositions, - options: options, - analyzer: analyzer, - value: value, + name: name, + arrayPositions: arrayPositions, + options: options, + analyzer: analyzer, + value: value, + numPlainTextBytes: uint64(len(value)), } } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index.go index 5cc9e0a2d772e2211b00070ff4d9d082dce2dfb8..499a040addf39e86c67741f2898aaf54427e938a 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index.go @@ -13,6 +13,7 @@ import ( "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/document" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" + "golang.org/x/net/context" ) // A Batch groups together multiple Index and Delete @@ -167,6 +168,7 @@ type Index interface { DocCount() (uint64, error) Search(req *SearchRequest) (*SearchResult, error) + SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) Fields() ([]string, error) @@ -197,6 +199,7 @@ type Index interface { Mapping() *IndexMapping Stats() *IndexStat + StatsMap() map[string]interface{} GetInternal(key []byte) ([]byte, error) SetInternal(key, val []byte) error diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/dict_updater.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/dict_updater.go index 053ba0d474d6711035e6ef991daa51a503745b77..c30fce26e55d54449d434e8174fa160643ed9486 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/dict_updater.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/dict_updater.go @@ -22,6 +22,9 @@ const DefaultDictUpdateThreshold = 10 var DefaultDictUpdateSleep = 1 * time.Second type DictUpdater struct { + batchesStarted uint64 + batchesFlushed uint64 + f *Firestorm dictUpdateSleep time.Duration quit chan struct{} @@ -30,9 +33,6 @@ type DictUpdater struct { mutex sync.RWMutex workingSet map[string]int64 closeWait sync.WaitGroup - - batchesStarted uint64 - batchesFlushed uint64 } func NewDictUpdater(f *Firestorm) *DictUpdater { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/firestorm.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/firestorm.go index ffdddaac58aef7cb0fdfa7916e57084505c71240..912502e1b0fdaf8bb69b900b9346a2dd0d5f290a 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/firestorm.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/firestorm.go @@ -27,14 +27,15 @@ const Name = "firestorm" var UnsafeBatchUseDetected = fmt.Errorf("bleve.Batch is NOT thread-safe, modification after execution detected") type Firestorm struct { + highDocNumber uint64 + docCount uint64 + storeName string storeConfig map[string]interface{} store store.KVStore compensator *Compensator analysisQueue *index.AnalysisQueue fieldCache *index.FieldCache - highDocNumber uint64 - docCount *uint64 garbageCollector *GarbageCollector lookuper *Lookuper dictUpdater *DictUpdater @@ -42,14 +43,13 @@ type Firestorm struct { } func NewFirestorm(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) { - initialCount := uint64(0) rv := Firestorm{ storeName: storeName, storeConfig: storeConfig, compensator: NewCompensator(), analysisQueue: analysisQueue, fieldCache: index.NewFieldCache(), - docCount: &initialCount, + docCount: 0, highDocNumber: 0, stats: &indexStat{}, } @@ -75,10 +75,6 @@ func (f *Firestorm) Open() (err error) { return } - if ss, ok := f.store.(store.KVStoreStats); ok { - f.stats.kvStats = ss.Stats() - } - // start a reader var kvreader store.KVReader kvreader, err = f.store.Reader() @@ -134,7 +130,7 @@ func (f *Firestorm) Close() error { } func (f *Firestorm) DocCount() (uint64, error) { - count := atomic.LoadUint64(f.docCount) + count := atomic.LoadUint64(&f.docCount) return count, nil } @@ -146,6 +142,7 @@ func (f *Firestorm) Update(doc *document.Document) (err error) { // do analysis before acquiring write lock analysisStart := time.Now() + numPlainTextBytes := doc.NumPlainTextBytes() resultChan := make(chan *index.AnalysisResult) aw := index.NewAnalysisWork(f, doc, resultChan) @@ -183,6 +180,7 @@ func (f *Firestorm) Update(doc *document.Document) (err error) { f.dictUpdater.NotifyBatch(dictionaryDeltas) atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart))) + atomic.AddUint64(&f.stats.numPlainTextBytesIndexed, numPlainTextBytes) return } @@ -302,11 +300,13 @@ func (f *Firestorm) Batch(batch *index.Batch) (err error) { var docsUpdated uint64 var docsDeleted uint64 + var numPlainTextBytes uint64 for _, doc := range batch.IndexOps { if doc != nil { doc.Number = firstDocNumber // actually assign doc numbers here firstDocNumber++ docsUpdated++ + numPlainTextBytes += doc.NumPlainTextBytes() } else { docsDeleted++ } @@ -411,6 +411,7 @@ func (f *Firestorm) Batch(batch *index.Batch) (err error) { atomic.AddUint64(&f.stats.updates, docsUpdated) atomic.AddUint64(&f.stats.deletes, docsDeleted) atomic.AddUint64(&f.stats.batches, 1) + atomic.AddUint64(&f.stats.numPlainTextBytesIndexed, numPlainTextBytes) } else { atomic.AddUint64(&f.stats.errors, 1) } @@ -543,7 +544,10 @@ func (f *Firestorm) Reader() (index.IndexReader, error) { func (f *Firestorm) Stats() json.Marshaler { return f.stats +} +func (f *Firestorm) StatsMap() map[string]interface{} { + return f.stats.statsMap() } func (f *Firestorm) Wait(timeout time.Duration) error { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/lookup.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/lookup.go index d58640e6ad1f7a799015d2a1c96c0c1d38cbec1c..fee6c8289b797276fd01f8e7c335be2e0167096d 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/lookup.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/lookup.go @@ -19,13 +19,13 @@ import ( const channelBufferSize = 1000 type Lookuper struct { + tasksQueued uint64 + tasksDone uint64 + f *Firestorm workChan chan []*InFlightItem quit chan struct{} closeWait sync.WaitGroup - - tasksQueued uint64 - tasksDone uint64 } func NewLookuper(f *Firestorm) *Lookuper { @@ -117,10 +117,10 @@ func (l *Lookuper) lookup(item *InFlightItem) { l.f.compensator.Migrate(item.docID, item.docNum, oldDocNums) if len(oldDocNums) == 0 && item.docNum != 0 { // this was an add, not an update - atomic.AddUint64(l.f.docCount, 1) + atomic.AddUint64(&l.f.docCount, 1) } else if len(oldDocNums) > 0 && item.docNum == 0 { // this was a delete (and it previously existed) - atomic.AddUint64(l.f.docCount, ^uint64(0)) + atomic.AddUint64(&l.f.docCount, ^uint64(0)) } } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/reader_terms.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/reader_terms.go index a80607ba3c58bb4a8dafa8bca8c62de78cdf4f6c..008aa6881ab92272a17d782f20a03bccf87d0722 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/reader_terms.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/reader_terms.go @@ -11,6 +11,7 @@ package firestorm import ( "bytes" + "sync/atomic" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" @@ -55,6 +56,7 @@ func newFirestormTermFieldReader(r *firestormReader, field uint16, term []byte) rv.count = dictionaryRow.Count() } + atomic.AddUint64(&r.f.stats.termSearchersStarted, uint64(1)) return &rv, nil } @@ -135,6 +137,7 @@ func (r *firestormTermFieldReader) Count() uint64 { } func (r *firestormTermFieldReader) Close() error { + atomic.AddUint64(&r.r.f.stats.termSearchersFinished, uint64(1)) if r.i != nil { return r.i.Close() } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/stats.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/stats.go index ceac56114aae0e5c593b11262b72991422fa3a6c..ce87fa339a0d313860e091bfaad7adefa3350b8e 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/stats.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/stats.go @@ -12,16 +12,20 @@ package firestorm import ( "encoding/json" "sync/atomic" + + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" ) type indexStat struct { - f *Firestorm updates, deletes, batches, errors uint64 analysisTime, indexTime uint64 - kvStats json.Marshaler + termSearchersStarted uint64 + termSearchersFinished uint64 + numPlainTextBytesIndexed uint64 + f *Firestorm } -func (i *indexStat) MarshalJSON() ([]byte, error) { +func (i *indexStat) statsMap() map[string]interface{} { m := map[string]interface{}{} m["updates"] = atomic.LoadUint64(&i.updates) m["deletes"] = atomic.LoadUint64(&i.deletes) @@ -30,8 +34,18 @@ func (i *indexStat) MarshalJSON() ([]byte, error) { m["analysis_time"] = atomic.LoadUint64(&i.analysisTime) m["index_time"] = atomic.LoadUint64(&i.indexTime) m["lookup_queue_len"] = len(i.f.lookuper.workChan) - if i.kvStats != nil { - m["kv"] = i.kvStats + m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted) + m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished) + m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed) + + if o, ok := i.f.store.(store.KVStoreStats); ok { + m["kv"] = o.StatsMap() } + + return m +} + +func (i *indexStat) MarshalJSON() ([]byte, error) { + m := i.statsMap() return json.Marshal(m) } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/warmup.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/warmup.go index feb1e22a6ee124304cca3ed840fa739718b9a0dc..ea98effbe76b6cd0851901efbd7cfa6869e8c7ae 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/warmup.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/firestorm/warmup.go @@ -90,7 +90,7 @@ func (f *Firestorm) warmup(reader store.KVReader) error { lastDocNumbers = append(lastDocNumbers, docNum) } else { // new doc id - atomic.AddUint64(f.docCount, 1) + atomic.AddUint64(&f.docCount, 1) // last docID had multiple doc numbers if len(lastDocNumbers) > 1 { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/index.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/index.go index 294df0b3b02c583633c196e13d2abe4bc9464e06..1935b09773259de4f7e118e70610a713d6f0e5dd 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/index.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/index.go @@ -42,6 +42,7 @@ type Index interface { Reader() (IndexReader, error) Stats() json.Marshaler + StatsMap() map[string]interface{} Analyze(d *document.Document) *AnalysisResult diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/kvstore.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/kvstore.go index 7222af4fb4810aa6e61fd6089e1e21a04fd847c4..4d957789c51b9276990533ca0f9a65067d5084e9 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/kvstore.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/kvstore.go @@ -162,4 +162,6 @@ type KVBatch interface { type KVStoreStats interface { // Stats returns a JSON serializable object representing stats for this KVStore Stats() json.Marshaler + + StatsMap() map[string]interface{} } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/merge.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/merge.go index 1443dd3e82a599daa8fcd99554c35cfee26228fb..6e1a7da7f091be111e093cdc02a26cb61fa7a5bf 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/merge.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/merge.go @@ -9,8 +9,6 @@ package store -import "unsafe" - // At the moment this happens to be the same interface as described by // RocksDB, but this may not always be the case. @@ -30,34 +28,6 @@ type MergeOperator interface { Name() string } -// NativeMergeOperator is a merge operator that also includeTermVectors -// a C implementation -type NativeMergeOperator interface { - MergeOperator - - // a pointer to function in C with the signature - // char* (*full_merge)(void *state, - // const char* key, size_t key_length, - // const char* existing_value, - // size_t existing_value_length, - // const char* const* operands_list, - // const size_t* operands_list_length, int num_operands, - // unsigned char* success, size_t* new_value_length) - FullMergeC() unsafe.Pointer - - // a pointer to function in C with the signature - // char* (*partial_merge)(void *state, - // const char* key, size_t key_length, - // const char* const* operands_list, - // const size_t* operands_list_length, int num_operands, - // unsigned char* success, size_t* new_value_length) - PartialMergeC() unsafe.Pointer - - // a pointer to function in C with signature - // const char* merge_operator_name_fn(void *state) - NameC() unsafe.Pointer -} - type EmulatedMerge struct { Merges map[string][][]byte mo MergeOperator diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/metrics/stats.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/metrics/stats.go new file mode 100644 index 0000000000000000000000000000000000000000..bf8b3370c0ab40f45f3b07e83a548ffa5b976eb4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/metrics/stats.go @@ -0,0 +1,46 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package metrics + +import ( + "encoding/json" + + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" +) + +type stats struct { + s *Store +} + +func (s *stats) statsMap() map[string]interface{} { + ms := map[string]interface{}{} + + ms["metrics"] = map[string]interface{}{ + "reader_get": TimerMap(s.s.TimerReaderGet), + "reader_multi_get": TimerMap(s.s.TimerReaderMultiGet), + "reader_prefix_iterator": TimerMap(s.s.TimerReaderPrefixIterator), + "reader_range_iterator": TimerMap(s.s.TimerReaderRangeIterator), + "writer_execute_batch": TimerMap(s.s.TimerWriterExecuteBatch), + "iterator_seek": TimerMap(s.s.TimerIteratorSeek), + "iterator_next": TimerMap(s.s.TimerIteratorNext), + "batch_merge": TimerMap(s.s.TimerBatchMerge), + } + + if o, ok := s.s.o.(store.KVStoreStats); ok { + ms["kv"] = o.StatsMap() + } + + return ms +} + +func (s *stats) MarshalJSON() ([]byte, error) { + m := s.statsMap() + return json.Marshal(m) +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/metrics/store.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/metrics/store.go index 6df5d2106d1a9235db4c4135defc5f02ba44e6f7..8bbf0de332bdba4bb7351024eb81cf3941830b20 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/metrics/store.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/metrics/store.go @@ -43,6 +43,8 @@ type Store struct { m sync.Mutex // Protects the fields that follow. errors *list.List // Capped list of StoreError's. + + s *stats } func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { @@ -68,7 +70,7 @@ func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, return nil, err } - return &Store{ + rv := &Store{ o: kvs, TimerReaderGet: metrics.NewTimer(), @@ -81,7 +83,11 @@ func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, TimerBatchMerge: metrics.NewTimer(), errors: list.New(), - }, nil + } + + rv.s = &stats{s: rv} + + return rv, nil } func init() { @@ -254,3 +260,11 @@ func (s *Store) WriteCSV(w io.Writer) { WriteTimerCSV(w, s.TimerIteratorNext) WriteTimerCSV(w, s.TimerBatchMerge) } + +func (s *Store) Stats() json.Marshaler { + return s.s +} + +func (s *Store) StatsMap() map[string]interface{} { + return s.s.statsMap() +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/metrics/util.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/metrics/util.go index 053e38ee8a520970248a8b69e92bed25e32381e4..0f94ba7f460616f330f593d23cf201b226cd94ef 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/metrics/util.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/metrics/util.go @@ -12,6 +12,36 @@ import ( var timerPercentiles = []float64{0.5, 0.75, 0.95, 0.99, 0.999} +func TimerMap(timer metrics.Timer) map[string]interface{} { + + rv := make(map[string]interface{}) + t := timer.Snapshot() + p := t.Percentiles(timerPercentiles) + + percentiles := make(map[string]interface{}) + percentiles["median"] = p[0] + percentiles["75%"] = p[1] + percentiles["95%"] = p[2] + percentiles["99%"] = p[3] + percentiles["99.9%"] = p[4] + + rates := make(map[string]interface{}) + rates["1-min"] = t.Rate1() + rates["5-min"] = t.Rate5() + rates["15-min"] = t.Rate15() + rates["mean"] = t.RateMean() + + rv["count"] = t.Count() + rv["min"] = t.Min() + rv["max"] = t.Max() + rv["mean"] = t.Mean() + rv["stddev"] = t.StdDev() + rv["percentiles"] = percentiles + rv["rates"] = rates + + return rv +} + func WriteTimerJSON(w io.Writer, timer metrics.Timer) { t := timer.Snapshot() p := t.Percentiles(timerPercentiles) diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/batch.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/batch.go new file mode 100644 index 0000000000000000000000000000000000000000..e18c41892b7ad91cfbc31016376355fb7c21dacf --- /dev/null +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/batch.go @@ -0,0 +1,84 @@ +// Copyright (c) 2016 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an "AS +// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language +// governing permissions and limitations under the License. + +package moss + +import ( + "github.com/couchbase/moss" + + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" +) + +type Batch struct { + store *Store + merge *store.EmulatedMerge + batch moss.Batch + buf []byte // Non-nil when using pre-alloc'ed / NewBatchEx(). + bufUsed int +} + +func (b *Batch) Set(key, val []byte) { + var err error + if b.buf != nil { + b.bufUsed += len(key) + len(val) + err = b.batch.AllocSet(key, val) + } else { + err = b.batch.Set(key, val) + } + + if err != nil { + b.store.Logf("bleve moss batch.Set err: %v", err) + } +} + +func (b *Batch) Delete(key []byte) { + var err error + if b.buf != nil { + b.bufUsed += len(key) + err = b.batch.AllocDel(key) + } else { + err = b.batch.Del(key) + } + + if err != nil { + b.store.Logf("bleve moss batch.Delete err: %v", err) + } +} + +func (b *Batch) Merge(key, val []byte) { + if b.buf != nil { + b.bufUsed += len(key) + len(val) + } + b.merge.Merge(key, val) +} + +func (b *Batch) Reset() { + err := b.Close() + if err != nil { + b.store.Logf("bleve moss batch.Close err: %v", err) + return + } + + batch, err := b.store.ms.NewBatch(0, 0) + if err == nil { + b.batch = batch + b.merge = store.NewEmulatedMerge(b.store.mo) + b.buf = nil + b.bufUsed = 0 + } +} + +func (b *Batch) Close() error { + b.merge = nil + err := b.batch.Close() + b.batch = nil + return err +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/iterator.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/iterator.go new file mode 100644 index 0000000000000000000000000000000000000000..cf616fe465143bd84b258069a3ee12da43a3c145 --- /dev/null +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/iterator.go @@ -0,0 +1,134 @@ +// Copyright (c) 2016 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an "AS +// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language +// governing permissions and limitations under the License. + +package moss + +import ( + "bytes" + + "github.com/couchbase/moss" +) + +type Iterator struct { + store *Store + ss moss.Snapshot + iter moss.Iterator + prefix []byte + start []byte + end []byte + done bool + k []byte + v []byte +} + +func (x *Iterator) Seek(seekToKey []byte) { + x.done = true + x.k = nil + x.v = nil + + if bytes.Compare(seekToKey, x.start) < 0 { + seekToKey = x.start + } + + iter, err := x.ss.StartIterator(seekToKey, x.end, moss.IteratorOptions{}) + if err != nil { + x.store.Logf("bleve moss StartIterator err: %v", err) + return + } + + err = x.iter.Close() + if err != nil { + x.store.Logf("bleve moss iterator.Seek err: %v", err) + return + } + + x.iter = iter + + x.checkDone() +} + +func (x *Iterator) Next() { + if x.done { + return + } + + x.done = true + x.k = nil + x.v = nil + + err := x.iter.Next() + if err != nil { + return + } + + x.checkDone() +} + +func (x *Iterator) Current() ([]byte, []byte, bool) { + return x.k, x.v, !x.done +} + +func (x *Iterator) Key() []byte { + if x.done { + return nil + } + + return x.k +} + +func (x *Iterator) Value() []byte { + if x.done { + return nil + } + + return x.v +} + +func (x *Iterator) Valid() bool { + return !x.done +} + +func (x *Iterator) Close() error { + var err error + + x.ss = nil + + if x.iter != nil { + err = x.iter.Close() + x.iter = nil + } + + x.prefix = nil + x.done = true + x.k = nil + x.v = nil + + return err +} + +func (x *Iterator) checkDone() { + x.done = true + x.k = nil + x.v = nil + + k, v, err := x.iter.Current() + if err != nil { + return + } + + if x.prefix != nil && !bytes.HasPrefix(k, x.prefix) { + return + } + + x.done = false + x.k = k + x.v = v +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/lower.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/lower.go new file mode 100644 index 0000000000000000000000000000000000000000..2517bbf8290b0e92a0fedae35ddc8eba4712fbd4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/lower.go @@ -0,0 +1,404 @@ +// Copyright (c) 2016 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an "AS +// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language +// governing permissions and limitations under the License. + +// Package moss provides a KVStore implementation based on the +// github.com/couchbaselabs/moss library. + +package moss + +import ( + "fmt" + "sync" + + "github.com/couchbase/moss" + + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/registry" +) + +func initLowerLevelStore( + mo store.MergeOperator, + config map[string]interface{}, + lowerLevelStoreName string, + lowerLevelStoreConfig map[string]interface{}, + lowerLevelMaxBatchSize uint64, + logf func(format string, a ...interface{}), +) (moss.Snapshot, moss.LowerLevelUpdate, store.KVStore, error) { + constructor := registry.KVStoreConstructorByName(lowerLevelStoreName) + if constructor == nil { + return nil, nil, nil, fmt.Errorf("moss store, initLowerLevelStore,"+ + " could not find lower level store: %s", lowerLevelStoreName) + } + + if lowerLevelStoreConfig == nil { + lowerLevelStoreConfig = map[string]interface{}{} + } + + for k, v := range config { + _, exists := lowerLevelStoreConfig[k] + if !exists { + lowerLevelStoreConfig[k] = v + } + } + + kvStore, err := constructor(mo, lowerLevelStoreConfig) + if err != nil { + return nil, nil, nil, err + } + + llStore := &llStore{ + refs: 0, + config: config, + llConfig: lowerLevelStoreConfig, + kvStore: kvStore, + logf: logf, + } + + llUpdate := func(ssHigher moss.Snapshot) (ssLower moss.Snapshot, err error) { + return llStore.update(ssHigher, lowerLevelMaxBatchSize) + } + + llSnapshot, err := llUpdate(nil) + if err != nil { + _ = kvStore.Close() + return nil, nil, nil, err + } + + return llSnapshot, llUpdate, kvStore, nil // llStore.refs is now 1. +} + +// ------------------------------------------------ + +// llStore is a lower level store and provides ref-counting around a +// bleve store.KVStore. +type llStore struct { + kvStore store.KVStore + + config map[string]interface{} + llConfig map[string]interface{} + + logf func(format string, a ...interface{}) + + m sync.Mutex // Protects fields that follow. + refs int +} + +// llSnapshot represents a lower-level snapshot, wrapping a bleve +// store.KVReader, and implements the moss.Snapshot interface. +type llSnapshot struct { + llStore *llStore // Holds 1 refs on the llStore. + kvReader store.KVReader + + m sync.Mutex // Protects fields that follow. + refs int +} + +// llIterator represents a lower-level iterator, wrapping a bleve +// store.KVIterator, and implements the moss.Iterator interface. +type llIterator struct { + llSnapshot *llSnapshot // Holds 1 refs on the llSnapshot. + + // Some lower-level KVReader implementations need a separate + // KVReader clone, due to KVReader single-threaded'ness. + kvReader store.KVReader + + kvIterator store.KVIterator +} + +type readerSource interface { + Reader() (store.KVReader, error) +} + +// ------------------------------------------------ + +func (s *llStore) addRef() *llStore { + s.m.Lock() + s.refs += 1 + s.m.Unlock() + + return s +} + +func (s *llStore) decRef() { + s.m.Lock() + s.refs -= 1 + if s.refs <= 0 { + err := s.kvStore.Close() + if err != nil { + s.logf("llStore kvStore.Close err: %v", err) + } + } + s.m.Unlock() +} + +// update() mutates this lower level store with latest data from the +// given higher level moss.Snapshot and returns a new moss.Snapshot +// that the higher level can use which represents this lower level +// store. +func (s *llStore) update(ssHigher moss.Snapshot, maxBatchSize uint64) ( + ssLower moss.Snapshot, err error) { + if ssHigher != nil { + iter, err := ssHigher.StartIterator(nil, nil, moss.IteratorOptions{ + IncludeDeletions: true, + SkipLowerLevel: true, + }) + if err != nil { + return nil, err + } + + defer func() { + err = iter.Close() + if err != nil { + s.logf("llStore iter.Close err: %v", err) + } + }() + + kvWriter, err := s.kvStore.Writer() + if err != nil { + return nil, err + } + + defer func() { + err = kvWriter.Close() + if err != nil { + s.logf("llStore kvWriter.Close err: %v", err) + } + }() + + batch := kvWriter.NewBatch() + + defer func() { + if batch != nil { + err = batch.Close() + if err != nil { + s.logf("llStore batch.Close err: %v", err) + } + } + }() + + var readOptions moss.ReadOptions + + i := uint64(0) + for { + if i%1000000 == 0 { + s.logf("llStore.update, i: %d", i) + } + + ex, key, val, err := iter.CurrentEx() + if err == moss.ErrIteratorDone { + break + } + if err != nil { + return nil, err + } + + switch ex.Operation { + case moss.OperationSet: + batch.Set(key, val) + + case moss.OperationDel: + batch.Delete(key) + + case moss.OperationMerge: + val, err = ssHigher.Get(key, readOptions) + if err != nil { + return nil, err + } + + if val != nil { + batch.Set(key, val) + } else { + batch.Delete(key) + } + + default: + return nil, fmt.Errorf("moss store, update,"+ + " unexpected operation, ex: %v", ex) + } + + i++ + + err = iter.Next() + if err == moss.ErrIteratorDone { + break + } + if err != nil { + return nil, err + } + + if maxBatchSize > 0 && i%maxBatchSize == 0 { + err = kvWriter.ExecuteBatch(batch) + if err != nil { + return nil, err + } + + err = batch.Close() + if err != nil { + return nil, err + } + + batch = kvWriter.NewBatch() + } + } + + if i > 0 { + s.logf("llStore.update, ExecuteBatch,"+ + " path: %s, total: %d, start", s.llConfig["path"], i) + + err = kvWriter.ExecuteBatch(batch) + if err != nil { + return nil, err + } + + s.logf("llStore.update, ExecuteBatch,"+ + " path: %s: total: %d, done", s.llConfig["path"], i) + } + } + + kvReader, err := s.kvStore.Reader() + if err != nil { + return nil, err + } + + s.logf("llStore.update, new reader") + + return &llSnapshot{ + llStore: s.addRef(), + kvReader: kvReader, + refs: 1, + }, nil +} + +// ------------------------------------------------ + +func (llss *llSnapshot) addRef() *llSnapshot { + llss.m.Lock() + llss.refs += 1 + llss.m.Unlock() + + return llss +} + +func (llss *llSnapshot) decRef() { + llss.m.Lock() + llss.refs -= 1 + if llss.refs <= 0 { + if llss.kvReader != nil { + err := llss.kvReader.Close() + if err != nil { + llss.llStore.logf("llSnapshot kvReader.Close err: %v", err) + } + + llss.kvReader = nil + } + + if llss.llStore != nil { + llss.llStore.decRef() + llss.llStore = nil + } + } + llss.m.Unlock() +} + +func (llss *llSnapshot) Close() error { + llss.decRef() + + return nil +} + +func (llss *llSnapshot) Get(key []byte, + readOptions moss.ReadOptions) ([]byte, error) { + rs, ok := llss.kvReader.(readerSource) + if ok { + r2, err := rs.Reader() + if err != nil { + return nil, err + } + + val, err := r2.Get(key) + + _ = r2.Close() + + return val, err + } + + return llss.kvReader.Get(key) +} + +func (llss *llSnapshot) StartIterator( + startKeyInclusive, endKeyExclusive []byte, + iteratorOptions moss.IteratorOptions) (moss.Iterator, error) { + rs, ok := llss.kvReader.(readerSource) + if ok { + r2, err := rs.Reader() + if err != nil { + return nil, err + } + + i2 := r2.RangeIterator(startKeyInclusive, endKeyExclusive) + + return &llIterator{llSnapshot: llss.addRef(), kvReader: r2, kvIterator: i2}, nil + } + + i := llss.kvReader.RangeIterator(startKeyInclusive, endKeyExclusive) + + return &llIterator{llSnapshot: llss.addRef(), kvReader: nil, kvIterator: i}, nil +} + +// ------------------------------------------------ + +func (lli *llIterator) Close() error { + var err0 error + if lli.kvIterator != nil { + err0 = lli.kvIterator.Close() + lli.kvIterator = nil + } + + var err1 error + if lli.kvReader != nil { + err1 = lli.kvReader.Close() + lli.kvReader = nil + } + + lli.llSnapshot.decRef() + lli.llSnapshot = nil + + if err0 != nil { + return err0 + } + + if err1 != nil { + return err1 + } + + return nil +} + +func (lli *llIterator) Next() error { + lli.kvIterator.Next() + + return nil +} + +func (lli *llIterator) Current() (key, val []byte, err error) { + key, val, ok := lli.kvIterator.Current() + if !ok { + return nil, nil, moss.ErrIteratorDone + } + + return key, val, nil +} + +func (lli *llIterator) CurrentEx() ( + entryEx moss.EntryEx, key, val []byte, err error) { + return moss.EntryEx{}, nil, nil, moss.ErrUnimplemented + +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/reader.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/reader.go new file mode 100644 index 0000000000000000000000000000000000000000..13bc1a34494dddf107b55e83ef120f8fc0fe080d --- /dev/null +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/reader.go @@ -0,0 +1,82 @@ +// Copyright (c) 2016 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an "AS +// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language +// governing permissions and limitations under the License. + +package moss + +import ( + "github.com/couchbase/moss" + + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" +) + +type Reader struct { + store *Store + ss moss.Snapshot +} + +func (r *Reader) Get(k []byte) (v []byte, err error) { + v, err = r.ss.Get(k, moss.ReadOptions{}) + if err != nil { + return nil, err + } + if v != nil { + return append([]byte(nil), v...), nil + } + return nil, nil +} + +func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) { + return store.MultiGet(r, keys) +} + +func (r *Reader) PrefixIterator(k []byte) store.KVIterator { + iter, err := r.ss.StartIterator(k, nil, moss.IteratorOptions{}) + if err != nil { + return nil + } + + rv := &Iterator{ + store: r.store, + ss: r.ss, + iter: iter, + prefix: k, + start: k, + end: nil, + } + + rv.checkDone() + + return rv +} + +func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { + iter, err := r.ss.StartIterator(start, end, moss.IteratorOptions{}) + if err != nil { + return nil + } + + rv := &Iterator{ + store: r.store, + ss: r.ss, + iter: iter, + prefix: nil, + start: start, + end: end, + } + + rv.checkDone() + + return rv +} + +func (r *Reader) Close() error { + return r.ss.Close() +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/stats.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/stats.go new file mode 100644 index 0000000000000000000000000000000000000000..7bc2a069fcda0a400063ff9984abbf53996c754c --- /dev/null +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/stats.go @@ -0,0 +1,43 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package moss + +import ( + "encoding/json" + + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" +) + +type stats struct { + s *Store +} + +func (s *stats) statsMap() map[string]interface{} { + ms := map[string]interface{}{} + + var err error + ms["moss"], err = s.s.ms.Stats() + if err != nil { + return ms + } + + if s.s.llstore != nil { + if o, ok := s.s.llstore.(store.KVStoreStats); ok { + ms["kv"] = o.StatsMap() + } + } + + return ms +} + +func (s *stats) MarshalJSON() ([]byte, error) { + m := s.statsMap() + return json.Marshal(m) +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/store.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/store.go new file mode 100644 index 0000000000000000000000000000000000000000..450384332d1ed4c1da1ccad6885e4fbf7266b87c --- /dev/null +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/store.go @@ -0,0 +1,196 @@ +// Copyright (c) 2016 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an "AS +// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language +// governing permissions and limitations under the License. + +// Package moss provides a KVStore implementation based on the +// github.com/couchbaselabs/moss library. + +package moss + +import ( + "encoding/json" + "fmt" + "sync" + + "github.com/couchbase/moss" + + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/registry" +) + +// RegistryCollectionOptions should be treated as read-only after +// process init()'ialization. +var RegistryCollectionOptions = map[string]moss.CollectionOptions{} + +const Name = "moss" + +type Store struct { + m sync.Mutex + ms moss.Collection + mo store.MergeOperator + llstore store.KVStore + + s *stats +} + +// New initializes a moss storage with values from the optional +// config["mossCollectionOptions"] (a JSON moss.CollectionOptions). +// Next, values from the RegistryCollectionOptions, named by the +// optional config["mossCollectionOptionsName"], take precedence. +// Finally, base case defaults are taken from +// moss.DefaultCollectionOptions. +func New(mo store.MergeOperator, config map[string]interface{}) ( + store.KVStore, error) { + options := moss.DefaultCollectionOptions // Copy. + + v, ok := config["mossCollectionOptionsName"] + if ok { + name, ok := v.(string) + if !ok { + return nil, fmt.Errorf("moss store,"+ + " could not parse config[mossCollectionOptionsName]: %v", v) + } + + options, ok = RegistryCollectionOptions[name] // Copy. + if !ok { + return nil, fmt.Errorf("moss store,"+ + " could not find RegistryCollectionOptions, name: %s", name) + } + } + + v, ok = config["mossCollectionOptions"] + if ok { + b, err := json.Marshal(v) // Convert from map[string]interface{}. + if err != nil { + return nil, fmt.Errorf("moss store,"+ + " could not marshal config[mossCollectionOptions]: %v", v) + } + + err = json.Unmarshal(b, &options) + if err != nil { + return nil, fmt.Errorf("moss store,"+ + " could not unmarshal config[mossCollectionOptions]: %v", v) + } + } + + // -------------------------------------------------- + + if options.Log == nil || options.Debug <= 0 { + options.Log = func(format string, a ...interface{}) {} + } + + // -------------------------------------------------- + + mossLowerLevelStoreName := "" + v, ok = config["mossLowerLevelStoreName"] + if ok { + mossLowerLevelStoreName, ok = v.(string) + if !ok { + return nil, fmt.Errorf("moss store,"+ + " could not parse config[mossLowerLevelStoreName]: %v", v) + } + } + + var llStore store.KVStore + if options.LowerLevelInit == nil && + options.LowerLevelUpdate == nil && + mossLowerLevelStoreName != "" { + mossLowerLevelStoreConfig := map[string]interface{}{} + v, ok := config["mossLowerLevelStoreConfig"] + if ok { + mossLowerLevelStoreConfig, ok = v.(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("moss store, initLowerLevelStore,"+ + " could parse mossLowerLevelStoreConfig: %v", v) + } + } + + mossLowerLevelMaxBatchSize := uint64(0) + v, ok = config["mossLowerLevelMaxBatchSize"] + if ok { + mossLowerLevelMaxBatchSizeF, ok := v.(float64) + if !ok { + return nil, fmt.Errorf("moss store,"+ + " could not parse config[mossLowerLevelMaxBatchSize]: %v", v) + } + + mossLowerLevelMaxBatchSize = uint64(mossLowerLevelMaxBatchSizeF) + } + + lowerLevelInit, lowerLevelUpdate, lowerLevelStore, err := + initLowerLevelStore(mo, config, + mossLowerLevelStoreName, + mossLowerLevelStoreConfig, + mossLowerLevelMaxBatchSize, + options.Log) + if err != nil { + return nil, err + } + + options.LowerLevelInit = lowerLevelInit + options.LowerLevelUpdate = lowerLevelUpdate + llStore = lowerLevelStore + } + + // -------------------------------------------------- + + options.MergeOperator = mo + + ms, err := moss.NewCollection(options) + if err != nil { + return nil, err + } + err = ms.Start() + if err != nil { + return nil, err + } + rv := Store{ + ms: ms, + mo: mo, + llstore: llStore, + } + rv.s = &stats{s: &rv} + return &rv, nil +} + +func (s *Store) Close() error { + return s.ms.Close() +} + +func (s *Store) Reader() (store.KVReader, error) { + ss, err := s.ms.Snapshot() + if err != nil { + return nil, err + } + return &Reader{ss: ss}, nil +} + +func (s *Store) Writer() (store.KVWriter, error) { + return &Writer{s: s}, nil +} + +func (s *Store) Logf(fmt string, args ...interface{}) { + options := s.ms.Options() + if options.Log != nil { + options.Log(fmt, args...) + } +} + +func (s *Store) Stats() json.Marshaler { + return s.s +} + +func (s *Store) StatsMap() map[string]interface{} { + return s.s.statsMap() +} + +func init() { + registry.RegisterKVStore(Name, New) +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/writer.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/writer.go new file mode 100644 index 0000000000000000000000000000000000000000..0985f7baa985e270225823eb3e206b44fa64a8fb --- /dev/null +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store/moss/writer.go @@ -0,0 +1,94 @@ +// Copyright (c) 2016 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an "AS +// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language +// governing permissions and limitations under the License. + +package moss + +import ( + "fmt" + + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" + + "github.com/couchbase/moss" +) + +type Writer struct { + s *Store +} + +func (w *Writer) NewBatch() store.KVBatch { + b, err := w.s.ms.NewBatch(0, 0) + if err != nil { + return nil + } + + return &Batch{ + store: w.s, + merge: store.NewEmulatedMerge(w.s.mo), + batch: b, + } +} + +func (w *Writer) NewBatchEx(options store.KVBatchOptions) ( + []byte, store.KVBatch, error) { + numOps := options.NumSets + options.NumDeletes + options.NumMerges + + b, err := w.s.ms.NewBatch(numOps, options.TotalBytes) + if err != nil { + return nil, nil, err + } + + buf, err := b.Alloc(options.TotalBytes) + if err != nil { + return nil, nil, err + } + + return buf, &Batch{ + store: w.s, + merge: store.NewEmulatedMerge(w.s.mo), + batch: b, + buf: buf, + bufUsed: 0, + }, nil +} + +func (w *Writer) ExecuteBatch(b store.KVBatch) (err error) { + batch, ok := b.(*Batch) + if !ok { + return fmt.Errorf("wrong type of batch") + } + + for kStr, mergeOps := range batch.merge.Merges { + for _, v := range mergeOps { + if batch.buf != nil { + kLen := len(kStr) + vLen := len(v) + kBuf := batch.buf[batch.bufUsed : batch.bufUsed+kLen] + vBuf := batch.buf[batch.bufUsed+kLen : batch.bufUsed+kLen+vLen] + copy(kBuf, kStr) + copy(vBuf, v) + batch.bufUsed += kLen + vLen + err = batch.batch.AllocMerge(kBuf, vBuf) + } else { + err = batch.batch.Merge([]byte(kStr), v) + } + if err != nil { + return err + } + } + } + + return w.s.ms.ExecuteBatch(batch.batch, moss.WriteOptions{}) +} + +func (w *Writer) Close() error { + w.s = nil + return nil +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/reader.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/reader.go index 473b2626143b6a53498399c7071c96e4448bb853..fa09f9e6c2d4e743c375bac781ed59c897ff6c52 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/reader.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/reader.go @@ -10,6 +10,8 @@ package upside_down import ( + "sync/atomic" + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" ) @@ -29,6 +31,7 @@ func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, fi return nil, err } if val == nil { + atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) return &UpsideDownCouchTermFieldReader{ count: 0, term: term, @@ -44,6 +47,7 @@ func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, fi tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0) it := indexReader.kvreader.PrefixIterator(tfr.Key()) + atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) return &UpsideDownCouchTermFieldReader{ indexReader: indexReader, iterator: it, @@ -163,5 +167,6 @@ func (r *UpsideDownCouchDocIDReader) Advance(docID string) (string, error) { } func (r *UpsideDownCouchDocIDReader) Close() error { + atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1)) return r.iterator.Close() } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/row.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/row.go index 9a85cff66be0faa6ce1178915cfb297da36c0226..6dfecbc6b0e503c8eee022747df13dd1c06761a3 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/row.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/row.go @@ -234,6 +234,8 @@ func NewFieldRowKV(key, value []byte) (*FieldRow, error) { // DICTIONARY +const DictionaryRowMaxValueSize = binary.MaxVarintLen64 + type DictionaryRow struct { field uint16 term []byte @@ -264,7 +266,7 @@ func (dr *DictionaryRow) Value() []byte { } func (dr *DictionaryRow) ValueSize() int { - return binary.MaxVarintLen64 + return DictionaryRowMaxValueSize } func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/stats.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/stats.go index 61cace27910f8c1b916f6673fab4b1aa0de97d49..4e488265b88292acb0810ec8a5d9ad9d0894424a 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/stats.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/stats.go @@ -12,15 +12,20 @@ package upside_down import ( "encoding/json" "sync/atomic" + + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" ) type indexStat struct { updates, deletes, batches, errors uint64 analysisTime, indexTime uint64 - kvStats json.Marshaler + termSearchersStarted uint64 + termSearchersFinished uint64 + numPlainTextBytesIndexed uint64 + i *UpsideDownCouch } -func (i *indexStat) MarshalJSON() ([]byte, error) { +func (i *indexStat) statsMap() map[string]interface{} { m := map[string]interface{}{} m["updates"] = atomic.LoadUint64(&i.updates) m["deletes"] = atomic.LoadUint64(&i.deletes) @@ -28,8 +33,18 @@ func (i *indexStat) MarshalJSON() ([]byte, error) { m["errors"] = atomic.LoadUint64(&i.errors) m["analysis_time"] = atomic.LoadUint64(&i.analysisTime) m["index_time"] = atomic.LoadUint64(&i.indexTime) - if i.kvStats != nil { - m["kv"] = i.kvStats + m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted) + m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished) + m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed) + + if o, ok := i.i.store.(store.KVStoreStats); ok { + m["kv"] = o.StatsMap() } + + return m +} + +func (i *indexStat) MarshalJSON() ([]byte, error) { + m := i.statsMap() return json.Marshal(m) } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/upside_down.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/upside_down.go index aabee9f0e1b33c4020932ce3800a0f8cf2895d9c..3e4452f90ff1002e0ad243010293bc14a86978b4 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/upside_down.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index/upside_down/upside_down.go @@ -68,14 +68,15 @@ type docBackIndexRow struct { } func NewUpsideDownCouch(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) { - return &UpsideDownCouch{ + rv := &UpsideDownCouch{ version: Version, fieldCache: index.NewFieldCache(), storeName: storeName, storeConfig: storeConfig, analysisQueue: analysisQueue, - stats: &indexStat{}, - }, nil + } + rv.stats = &indexStat{i: rv} + return rv, nil } func (udc *UpsideDownCouch) init(kvwriter store.KVWriter) (err error) { @@ -208,7 +209,7 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi mergeNum := len(dictionaryDeltas) mergeKeyBytes := 0 - mergeValBytes := mergeNum * 8 + mergeValBytes := mergeNum * DictionaryRowMaxValueSize for dictRowKey, _ := range dictionaryDeltas { mergeKeyBytes += len(dictRowKey) @@ -218,7 +219,7 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi totBytes := addKeyBytes + addValBytes + updateKeyBytes + updateValBytes + deleteKeyBytes + - mergeKeyBytes + mergeValBytes + 2*(mergeKeyBytes+mergeValBytes) buf, wb, err := writer.NewBatchEx(store.KVBatchOptions{ TotalBytes: totBytes, @@ -278,8 +279,8 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]Upsi for dictRowKey, delta := range dictionaryDeltas { dictRowKeyLen := copy(buf, dictRowKey) binary.LittleEndian.PutUint64(buf[dictRowKeyLen:], uint64(delta)) - wb.Merge(buf[:dictRowKeyLen], buf[dictRowKeyLen:dictRowKeyLen+8]) - buf = buf[dictRowKeyLen+8:] + wb.Merge(buf[:dictRowKeyLen], buf[dictRowKeyLen:dictRowKeyLen+DictionaryRowMaxValueSize]) + buf = buf[dictRowKeyLen+DictionaryRowMaxValueSize:] } // write out the batch @@ -310,10 +311,6 @@ func (udc *UpsideDownCouch) Open() (err error) { return } - if ss, ok := udc.store.(store.KVStoreStats); ok { - udc.stats.kvStats = ss.Stats() - } - // start a reader to look at the index var kvreader store.KVReader kvreader, err = udc.store.Reader() @@ -419,6 +416,7 @@ func (udc *UpsideDownCouch) Close() error { func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { // do analysis before acquiring write lock analysisStart := time.Now() + numPlainTextBytes := doc.NumPlainTextBytes() resultChan := make(chan *index.AnalysisResult) aw := index.NewAnalysisWork(udc, doc, resultChan) @@ -493,6 +491,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { atomic.AddUint64(&udc.stats.updates, 1) + atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) } else { atomic.AddUint64(&udc.stats.errors, 1) } @@ -795,9 +794,11 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps)) var numUpdates uint64 + var numPlainTextBytes uint64 for _, doc := range batch.IndexOps { if doc != nil { numUpdates++ + numPlainTextBytes += doc.NumPlainTextBytes() } } @@ -963,6 +964,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { atomic.AddUint64(&udc.stats.updates, numUpdates) atomic.AddUint64(&udc.stats.deletes, docsDeleted) atomic.AddUint64(&udc.stats.batches, 1) + atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) } else { atomic.AddUint64(&udc.stats.errors, 1) } @@ -1028,6 +1030,10 @@ func (udc *UpsideDownCouch) Stats() json.Marshaler { return udc.stats } +func (udc *UpsideDownCouch) StatsMap() map[string]interface{} { + return udc.stats.statsMap() +} + func (udc *UpsideDownCouch) Advanced() (store.KVStore, error) { return udc.store, nil } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index_alias_impl.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index_alias_impl.go index 3110391d0c721de3d9095bb13bbef5767104b28c..67df0af67ae94122510413e6808d9b744825ef78 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index_alias_impl.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index_alias_impl.go @@ -14,6 +14,8 @@ import ( "sync" "time" + "golang.org/x/net/context" + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/document" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" @@ -122,16 +124,20 @@ func (i *indexAliasImpl) DocCount() (uint64, error) { for _, index := range i.indexes { otherCount, err := index.DocCount() - if err != nil { - return 0, err + if err == nil { + rv += otherCount } - rv += otherCount + // tolerate errors to produce partial counts } return rv, nil } func (i *indexAliasImpl) Search(req *SearchRequest) (*SearchResult, error) { + return i.SearchInContext(context.Background(), req) +} + +func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) { i.mutex.RLock() defer i.mutex.RUnlock() @@ -145,10 +151,10 @@ func (i *indexAliasImpl) Search(req *SearchRequest) (*SearchResult, error) { // short circuit the simple case if len(i.indexes) == 1 { - return i.indexes[0].Search(req) + return i.indexes[0].SearchInContext(ctx, req) } - return MultiSearch(req, i.indexes...) + return MultiSearch(ctx, req, i.indexes...) } func (i *indexAliasImpl) Fields() ([]string, error) { @@ -333,6 +339,22 @@ func (i *indexAliasImpl) Stats() *IndexStat { return i.indexes[0].Stats() } +func (i *indexAliasImpl) StatsMap() map[string]interface{} { + i.mutex.RLock() + defer i.mutex.RUnlock() + + if !i.open { + return nil + } + + err := i.isAliasToSingleIndex() + if err != nil { + return nil + } + + return i.indexes[0].StatsMap() +} + func (i *indexAliasImpl) GetInternal(key []byte) ([]byte, error) { i.mutex.RLock() defer i.mutex.RUnlock() @@ -456,69 +478,96 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest { return &rv } +type asyncSearchResult struct { + Name string + Result *SearchResult + Err error +} + +func wrapSearch(ctx context.Context, in Index, req *SearchRequest) *asyncSearchResult { + rv := asyncSearchResult{Name: in.Name()} + rv.Result, rv.Err = in.SearchInContext(ctx, req) + return &rv +} + +func wrapSearchTimeout(ctx context.Context, in Index, req *SearchRequest) *asyncSearchResult { + reschan := make(chan *asyncSearchResult) + go func() { reschan <- wrapSearch(ctx, in, req) }() + select { + case res := <-reschan: + return res + case <-ctx.Done(): + return &asyncSearchResult{Name: in.Name(), Err: ctx.Err()} + } +} + // MultiSearch executes a SearchRequest across multiple // Index objects, then merges the results. -func MultiSearch(req *SearchRequest, indexes ...Index) (*SearchResult, error) { +func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) { + searchStart := time.Now() - results := make(chan *SearchResult) - errs := make(chan error) + asyncResults := make(chan *asyncSearchResult) // run search on each index in separate go routine var waitGroup sync.WaitGroup - var searchChildIndex = func(waitGroup *sync.WaitGroup, in Index, results chan *SearchResult, errs chan error) { - go func() { - defer waitGroup.Done() - childReq := createChildSearchRequest(req) - searchResult, err := in.Search(childReq) - if err != nil { - errs <- err - } else { - results <- searchResult - } - }() + var searchChildIndex = func(waitGroup *sync.WaitGroup, in Index, asyncResults chan *asyncSearchResult) { + childReq := createChildSearchRequest(req) + if ia, ok := in.(IndexAlias); ok { + // if the child index is another alias, trust it returns promptly on timeout/cancel + go func() { + defer waitGroup.Done() + asyncResults <- wrapSearch(ctx, ia, childReq) + }() + } else { + // if the child index is not an alias, enforce timeout here + go func() { + defer waitGroup.Done() + asyncResults <- wrapSearchTimeout(ctx, in, childReq) + }() + } } for _, in := range indexes { waitGroup.Add(1) - searchChildIndex(&waitGroup, in, results, errs) + searchChildIndex(&waitGroup, in, asyncResults) } // on another go routine, close after finished go func() { waitGroup.Wait() - close(results) - close(errs) + close(asyncResults) }() var sr *SearchResult - var err error - var result *SearchResult - ok := true - for ok { - select { - case result, ok = <-results: - if ok { - if sr == nil { - // first result - sr = result - } else { - // merge with previous - sr.Merge(result) - } - } - case err, ok = <-errs: - // for now stop on any error - // FIXME offer other behaviors - if err != nil { - return nil, err + indexErrors := make(map[string]error) + + for asr := range asyncResults { + if asr.Err == nil { + if sr == nil { + // first result + sr = asr.Result + } else { + // merge with previous + sr.Merge(asr.Result) } + } else { + indexErrors[asr.Name] = asr.Err } } // merge just concatenated all the hits // now lets clean it up + // handle case where no results were successful + if sr == nil { + sr = &SearchResult{ + Status: &SearchStatus{ + Errors: make(map[string]error), + }, + } + } + // first sort it by score sort.Sort(sr.Hits) @@ -544,6 +593,18 @@ func MultiSearch(req *SearchRequest, indexes ...Index) (*SearchResult, error) { searchDuration := time.Since(searchStart) sr.Took = searchDuration + // fix up errors + if len(indexErrors) > 0 { + if sr.Status.Errors == nil { + sr.Status.Errors = make(map[string]error) + } + for indexName, indexErr := range indexErrors { + sr.Status.Errors[indexName] = indexErr + sr.Status.Total++ + sr.Status.Failed++ + } + } + return sr, nil } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index_impl.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index_impl.go index dd766011e87d55d6f7c711e988f3cb6648d39fd6..5723faae923cf845b24eeddb8e2f7f8f64f077ca 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index_impl.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index_impl.go @@ -17,6 +17,8 @@ import ( "sync/atomic" "time" + "golang.org/x/net/context" + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/document" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" @@ -71,7 +73,6 @@ func newMemIndex(indexType string, mapping *IndexMapping) (*indexImpl, error) { if err != nil { return nil, err } - rv.stats.indexStat = rv.i.Stats() // now persist the mapping mappingBytes, err := json.Marshal(mapping) @@ -93,7 +94,7 @@ func newMemIndex(indexType string, mapping *IndexMapping) (*indexImpl, error) { func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (*indexImpl, error) { // first validate the mapping - err := mapping.validate() + err := mapping.Validate() if err != nil { return nil, err } @@ -107,12 +108,12 @@ func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore } rv := indexImpl{ - path: path, - name: path, - m: mapping, - meta: newIndexMeta(indexType, kvstore, kvconfig), - stats: &IndexStat{}, + path: path, + name: path, + m: mapping, + meta: newIndexMeta(indexType, kvstore, kvconfig), } + rv.stats = &IndexStat{i: &rv} // at this point there is hope that we can be successful, so save index meta err = rv.meta.Save(path) if err != nil { @@ -139,7 +140,6 @@ func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore } return nil, err } - rv.stats.indexStat = rv.i.Stats() // now persist the mapping mappingBytes, err := json.Marshal(mapping) @@ -161,10 +161,10 @@ func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *indexImpl, err error) { rv = &indexImpl{ - path: path, - name: path, - stats: &IndexStat{}, + path: path, + name: path, } + rv.stats = &IndexStat{i: rv} rv.meta, err = openIndexMeta(path) if err != nil { @@ -205,7 +205,6 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde } return nil, err } - rv.stats.indexStat = rv.i.Stats() // now load the mapping indexReader, err := rv.i.Reader() @@ -226,7 +225,7 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde var im IndexMapping err = json.Unmarshal(mappingBytes, &im) if err != nil { - return nil, err + return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes)) } // mark the index as open @@ -235,7 +234,7 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde rv.open = true // validate the mapping - err = im.validate() + err = im.Validate() if err != nil { // note even if the mapping is invalid // we still return an open usable index @@ -364,6 +363,12 @@ func (i *indexImpl) DocCount() (uint64, error) { // Search executes a search request operation. // Returns a SearchResult object or an error. func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) { + return i.SearchInContext(context.Background(), req) +} + +// SearchInContext executes a search request operation within the provided +// Context. Returns a SearchResult object or an error. +func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) { i.mutex.RLock() defer i.mutex.RUnlock() @@ -424,7 +429,7 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) { collector.SetFacetsBuilder(facetsBuilder) } - err = collector.Collect(searcher) + err = collector.Collect(ctx, searcher) if err != nil { return nil, err } @@ -528,6 +533,12 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) { } return &SearchResult{ + Status: &SearchStatus{ + Total: 1, + Failed: 0, + Successful: 1, + Errors: make(map[string]error), + }, Request: req, Hits: hits, Total: collector.Total(), @@ -699,6 +710,10 @@ func (i *indexImpl) Stats() *IndexStat { return i.stats } +func (i *indexImpl) StatsMap() map[string]interface{} { + return i.stats.statsMap() +} + func (i *indexImpl) GetInternal(key []byte) (val []byte, err error) { i.mutex.RLock() defer i.mutex.RUnlock() diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index_meta.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index_meta.go index ef0a4358bb96ab9c092a25fa9c02a03a066c43d2..ba91048080060ed52a1e22c869373eecca518e98 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index_meta.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index_meta.go @@ -56,9 +56,12 @@ func openIndexMeta(path string) (*indexMeta, error) { func (i *indexMeta) Save(path string) (err error) { indexMetaPath := indexMetaPath(path) // ensure any necessary parent directories exist - err = os.Mkdir(path, 0700) + err = os.MkdirAll(path, 0700) if err != nil { - return ErrorIndexPathExists + if os.IsExist(err) { + return ErrorIndexPathExists + } + return err } metaBytes, err := json.Marshal(i) if err != nil { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/index_stats.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/index_stats.go index d63f8568f5e8feb268910f269257f96bbf2cb72a..17aac350ac2bc25d9f892895a5ccf13a6ec76c83 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/index_stats.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/index_stats.go @@ -16,16 +16,21 @@ import ( ) type IndexStat struct { - indexStat json.Marshaler searches uint64 searchTime uint64 + i *indexImpl } -func (is *IndexStat) MarshalJSON() ([]byte, error) { +func (is *IndexStat) statsMap() map[string]interface{} { m := map[string]interface{}{} - m["index"] = is.indexStat + m["index"] = is.i.i.StatsMap() m["searches"] = atomic.LoadUint64(&is.searches) m["search_time"] = atomic.LoadUint64(&is.searchTime) + return m +} + +func (is *IndexStat) MarshalJSON() ([]byte, error) { + m := is.statsMap() return json.Marshal(m) } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/mapping_document.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/mapping_document.go index e933aad06736783720a46402c153b38e9aae3ca9..5587a34d0132e053e656eb0953fc38aa43b78bb5 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/mapping_document.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/mapping_document.go @@ -39,7 +39,7 @@ type DocumentMapping struct { DefaultAnalyzer string `json:"default_analyzer"` } -func (dm *DocumentMapping) validate(cache *registry.Cache) error { +func (dm *DocumentMapping) Validate(cache *registry.Cache) error { var err error if dm.DefaultAnalyzer != "" { _, err := cache.AnalyzerNamed(dm.DefaultAnalyzer) @@ -48,7 +48,7 @@ func (dm *DocumentMapping) validate(cache *registry.Cache) error { } } for _, property := range dm.Properties { - err = property.validate(cache) + err = property.Validate(cache) if err != nil { return err } @@ -75,58 +75,105 @@ func (dm *DocumentMapping) validate(cache *registry.Cache) error { return nil } +// analyzerNameForPath attempts to first find the field +// described by this path, then returns the analyzer +// configured for that field func (dm *DocumentMapping) analyzerNameForPath(path string) string { + field := dm.fieldDescribedByPath(path) + if field != nil { + return field.Analyzer + } + return "" +} + +func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping { pathElements := decodePath(path) - last := false - current := dm -OUTER: - for i, pathElement := range pathElements { - if i == len(pathElements)-1 { - last = true + if len(pathElements) > 1 { + // easy case, there is more than 1 path element remaining + // the next path element must match a property name + // at this level + for propName, subDocMapping := range dm.Properties { + if propName == pathElements[0] { + return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:])) + } } - for name, subDocMapping := range current.Properties { - for _, field := range subDocMapping.Fields { - if field.Name == "" && name == pathElement { - if last { - return field.Analyzer + } else { + // just 1 path elememnt + // first look for property name with empty field + for propName, subDocMapping := range dm.Properties { + if propName == pathElements[0] { + // found property name match, now look at its fields + for _, field := range subDocMapping.Fields { + if field.Name == "" || field.Name == pathElements[0] { + // match + return field } - current = subDocMapping - continue OUTER - } else if field.Name == pathElement { - if last { - return field.Analyzer + } + } + } + // next, walk the properties again, looking for field overriding the name + for propName, subDocMapping := range dm.Properties { + if propName != pathElements[0] { + // property name isn't a match, but field name could override it + for _, field := range subDocMapping.Fields { + if field.Name == pathElements[0] { + return field } - current = subDocMapping - continue OUTER } } } - return "" } - return "" + + return nil } +// documentMappingForPath only returns EXACT matches for a sub document +// or for an explicitly mapped field, if you want to find the +// closest document mapping to a field not explicitly mapped +// use closestDocMapping func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping { pathElements := decodePath(path) current := dm OUTER: - for _, pathElement := range pathElements { + for i, pathElement := range pathElements { for name, subDocMapping := range current.Properties { if name == pathElement { current = subDocMapping continue OUTER } } - for _, field := range current.Fields { - if field.Name == pathElement { - continue OUTER + // no subDocMapping matches this pathElement + // only if this is the last element check for field name + if i == len(pathElements)-1 { + for _, field := range current.Fields { + if field.Name == pathElement { + break + } } } + return nil } return current } +// closestDocMapping findest the most specific document mapping that matches +// part of the provided path +func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping { + pathElements := decodePath(path) + current := dm +OUTER: + for _, pathElement := range pathElements { + for name, subDocMapping := range current.Properties { + if name == pathElement { + current = subDocMapping + continue OUTER + } + } + } + return current +} + // NewDocumentMapping returns a new document mapping // with all the default values. func NewDocumentMapping() *DocumentMapping { @@ -313,6 +360,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, pathString := encodePath(path) // look to see if there is a mapping for this field subDocMapping := dm.documentMappingForPath(pathString) + closestDocMapping := dm.closestDocMapping(pathString) // check to see if we even need to do further processing if subDocMapping != nil && !subDocMapping.Enabled { @@ -333,7 +381,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, for _, fieldMapping := range subDocMapping.Fields { fieldMapping.processString(propertyValueString, pathString, path, indexes, context) } - } else if dm.Dynamic { + } else if closestDocMapping.Dynamic { // automatic indexing behavior // first see if it can be parsed by the default date parser @@ -342,11 +390,11 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString) if err != nil { // index as text - fieldMapping := newTextFieldMappingDynamic() + fieldMapping := newTextFieldMappingDynamic(context.im) fieldMapping.processString(propertyValueString, pathString, path, indexes, context) } else { // index as datetime - fieldMapping := newDateTimeFieldMappingDynamic() + fieldMapping := newDateTimeFieldMappingDynamic(context.im) fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context) } } @@ -364,9 +412,9 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, for _, fieldMapping := range subDocMapping.Fields { fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context) } - } else if dm.Dynamic { + } else if closestDocMapping.Dynamic { // automatic indexing behavior - fieldMapping := newNumericFieldMappingDynamic() + fieldMapping := newNumericFieldMappingDynamic(context.im) fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context) } case reflect.Bool: @@ -376,9 +424,9 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, for _, fieldMapping := range subDocMapping.Fields { fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context) } - } else if dm.Dynamic { + } else if closestDocMapping.Dynamic { // automatic indexing behavior - fieldMapping := newBooleanFieldMappingDynamic() + fieldMapping := newBooleanFieldMappingDynamic(context.im) fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context) } case reflect.Struct: @@ -390,8 +438,8 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, for _, fieldMapping := range subDocMapping.Fields { fieldMapping.processTime(property, pathString, path, indexes, context) } - } else if dm.Dynamic { - fieldMapping := newDateTimeFieldMappingDynamic() + } else if closestDocMapping.Dynamic { + fieldMapping := newDateTimeFieldMappingDynamic(context.im) fieldMapping.processTime(property, pathString, path, indexes, context) } default: diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/mapping_field.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/mapping_field.go index 5c23a948b12f8ae19ea1848bef1e915588c44711..6971c52f99560149d860f55e2a53e12f7209d35c 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/mapping_field.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/mapping_field.go @@ -61,10 +61,10 @@ func NewTextFieldMapping() *FieldMapping { } } -func newTextFieldMappingDynamic() *FieldMapping { +func newTextFieldMappingDynamic(im *IndexMapping) *FieldMapping { rv := NewTextFieldMapping() - rv.Store = StoreDynamic - rv.Index = IndexDynamic + rv.Store = im.StoreDynamic + rv.Index = im.IndexDynamic return rv } @@ -78,10 +78,10 @@ func NewNumericFieldMapping() *FieldMapping { } } -func newNumericFieldMappingDynamic() *FieldMapping { +func newNumericFieldMappingDynamic(im *IndexMapping) *FieldMapping { rv := NewNumericFieldMapping() - rv.Store = StoreDynamic - rv.Index = IndexDynamic + rv.Store = im.StoreDynamic + rv.Index = im.IndexDynamic return rv } @@ -95,10 +95,10 @@ func NewDateTimeFieldMapping() *FieldMapping { } } -func newDateTimeFieldMappingDynamic() *FieldMapping { +func newDateTimeFieldMappingDynamic(im *IndexMapping) *FieldMapping { rv := NewDateTimeFieldMapping() - rv.Store = StoreDynamic - rv.Index = IndexDynamic + rv.Store = im.StoreDynamic + rv.Index = im.IndexDynamic return rv } @@ -112,10 +112,10 @@ func NewBooleanFieldMapping() *FieldMapping { } } -func newBooleanFieldMappingDynamic() *FieldMapping { +func newBooleanFieldMappingDynamic(im *IndexMapping) *FieldMapping { rv := NewBooleanFieldMapping() - rv.Store = StoreDynamic - rv.Index = IndexDynamic + rv.Store = im.StoreDynamic + rv.Index = im.IndexDynamic return rv } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/mapping_index.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/mapping_index.go index e42ca3b0311498b397e039ec27fb0e38e504fae1..bb304ea73596c5038843825bb7659e93da500212 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/mapping_index.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/mapping_index.go @@ -130,6 +130,8 @@ type IndexMapping struct { DefaultDateTimeParser string `json:"default_datetime_parser"` DefaultField string `json:"default_field"` ByteArrayConverter string `json:"byte_array_converter"` + StoreDynamic bool `json:"store_dynamic"` + IndexDynamic bool `json:"index_dynamic"` CustomAnalysis *customAnalysis `json:"analysis,omitempty"` cache *registry.Cache } @@ -233,6 +235,8 @@ func NewIndexMapping() *IndexMapping { DefaultDateTimeParser: defaultDateTimeParser, DefaultField: defaultField, ByteArrayConverter: defaultByteArrayConverter, + IndexDynamic: IndexDynamic, + StoreDynamic: StoreDynamic, CustomAnalysis: newCustomAnalysis(), cache: registry.NewCache(), } @@ -240,7 +244,7 @@ func NewIndexMapping() *IndexMapping { // Validate will walk the entire structure ensuring the following // explicitly named and default analyzers can be built -func (im *IndexMapping) validate() error { +func (im *IndexMapping) Validate() error { _, err := im.cache.AnalyzerNamed(im.DefaultAnalyzer) if err != nil { return err @@ -249,12 +253,12 @@ func (im *IndexMapping) validate() error { if err != nil { return err } - err = im.DefaultMapping.validate(im.cache) + err = im.DefaultMapping.Validate(im.cache) if err != nil { return err } for _, docMapping := range im.TypeMapping { - err = docMapping.validate(im.cache) + err = docMapping.Validate(im.cache) if err != nil { return err } @@ -295,6 +299,8 @@ func (im *IndexMapping) UnmarshalJSON(data []byte) error { im.ByteArrayConverter = defaultByteArrayConverter im.DefaultMapping = NewDocumentMapping() im.TypeMapping = make(map[string]*DocumentMapping) + im.StoreDynamic = StoreDynamic + im.IndexDynamic = IndexDynamic var invalidKeys []string for k, v := range tmp { @@ -344,6 +350,16 @@ func (im *IndexMapping) UnmarshalJSON(data []byte) error { if err != nil { return err } + case "store_dynamic": + err := json.Unmarshal(v, &im.StoreDynamic) + if err != nil { + return err + } + case "index_dynamic": + err := json.Unmarshal(v, &im.IndexDynamic) + if err != nil { + return err + } default: invalidKeys = append(invalidKeys, k) } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/query.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/query.go index 27e0b6640926184a986ea0f8b3c35d2e93422978..a9695f52ed1587a8656cfb1a181acb45f51c5534 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/query.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/query.go @@ -192,6 +192,9 @@ func ParseQuery(input []byte) (Query, error) { if err != nil { return nil, err } + if rv.Boost() == 0 { + rv.SetBoost(1) + } return &rv, nil } _, hasWildcard := tmp["wildcard"] @@ -201,6 +204,9 @@ func ParseQuery(input []byte) (Query, error) { if err != nil { return nil, err } + if rv.Boost() == 0 { + rv.SetBoost(1) + } return &rv, nil } _, hasMatchAll := tmp["match_all"] diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/analyzer.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/analyzer.go index adeecfec76d037d1eb122dcef6877ce28d43a798..de95e0562aab22be5b1bd757e877f33245a5f17e 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/analyzer.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/analyzer.go @@ -25,40 +25,47 @@ func RegisterAnalyzer(name string, constructor AnalyzerConstructor) { type AnalyzerConstructor func(config map[string]interface{}, cache *Cache) (*analysis.Analyzer, error) type AnalyzerRegistry map[string]AnalyzerConstructor -type AnalyzerCache map[string]*analysis.Analyzer -func (c AnalyzerCache) AnalyzerNamed(name string, cache *Cache) (*analysis.Analyzer, error) { - analyzer, cached := c[name] - if cached { - return analyzer, nil +type AnalyzerCache struct { + *ConcurrentCache +} + +func NewAnalyzerCache() *AnalyzerCache { + return &AnalyzerCache{ + NewConcurrentCache(), } - analyzerConstructor, registered := analyzers[name] +} + +func AnalyzerBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { + cons, registered := analyzers[name] if !registered { return nil, fmt.Errorf("no analyzer with name or type '%s' registered", name) } - analyzer, err := analyzerConstructor(nil, cache) + analyzer, err := cons(config, cache) if err != nil { return nil, fmt.Errorf("error building analyzer: %v", err) } - c[name] = analyzer return analyzer, nil } -func (c AnalyzerCache) DefineAnalyzer(name string, typ string, config map[string]interface{}, cache *Cache) (*analysis.Analyzer, error) { - _, cached := c[name] - if cached { - return nil, fmt.Errorf("analyzer named '%s' already defined", name) - } - analyzerConstructor, registered := analyzers[typ] - if !registered { - return nil, fmt.Errorf("no analyzer type '%s' registered", typ) +func (c *AnalyzerCache) AnalyzerNamed(name string, cache *Cache) (*analysis.Analyzer, error) { + item, err := c.ItemNamed(name, cache, AnalyzerBuild) + if err != nil { + return nil, err } - analyzer, err := analyzerConstructor(config, cache) + return item.(*analysis.Analyzer), nil +} + +func (c *AnalyzerCache) DefineAnalyzer(name string, typ string, config map[string]interface{}, cache *Cache) (*analysis.Analyzer, error) { + item, err := c.DefineItem(name, typ, config, cache, AnalyzerBuild) if err != nil { - return nil, fmt.Errorf("error building analyzer: %v", err) + if err == ErrAlreadyDefined { + return nil, fmt.Errorf("analyzer named '%s' already defined", name) + } else { + return nil, err + } } - c[name] = analyzer - return analyzer, nil + return item.(*analysis.Analyzer), nil } func AnalyzerTypesAndInstances() ([]string, []string) { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/cache.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/cache.go new file mode 100644 index 0000000000000000000000000000000000000000..284a5a209a2009320cf99a45cbf3abf34419d6b3 --- /dev/null +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/cache.go @@ -0,0 +1,82 @@ +// Copyright (c) 2016 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. + +package registry + +import ( + "fmt" + "sync" +) + +var ErrAlreadyDefined = fmt.Errorf("item already defined") + +type CacheBuild func(name string, config map[string]interface{}, cache *Cache) (interface{}, error) + +type ConcurrentCache struct { + mutex sync.RWMutex + data map[string]interface{} +} + +func NewConcurrentCache() *ConcurrentCache { + return &ConcurrentCache{ + data: make(map[string]interface{}), + } +} + +func (c *ConcurrentCache) ItemNamed(name string, cache *Cache, build CacheBuild) (interface{}, error) { + c.mutex.RLock() + item, cached := c.data[name] + if cached { + c.mutex.RUnlock() + return item, nil + } + // give up read lock + c.mutex.RUnlock() + // try to build it + newItem, err := build(name, nil, cache) + if err != nil { + return nil, err + } + // acquire write lock + c.mutex.Lock() + defer c.mutex.Unlock() + // check again because it could have been created while trading locks + item, cached = c.data[name] + if cached { + return item, nil + } + c.data[name] = newItem + return newItem, nil +} + +func (c *ConcurrentCache) DefineItem(name string, typ string, config map[string]interface{}, cache *Cache, build CacheBuild) (interface{}, error) { + c.mutex.RLock() + _, cached := c.data[name] + if cached { + c.mutex.RUnlock() + return nil, ErrAlreadyDefined + } + // give up read lock so others lookups can proceed + c.mutex.RUnlock() + // really not there, try to build it + newItem, err := build(typ, config, cache) + if err != nil { + return nil, err + } + // now we've built it, acquire lock + c.mutex.Lock() + defer c.mutex.Unlock() + // check again because it could have been created while trading locks + _, cached = c.data[name] + if cached { + return nil, ErrAlreadyDefined + } + c.data[name] = newItem + return newItem, nil +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/char_filter.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/char_filter.go index d71998a05e3c4c77ad35c28ed6f775970046a0af..e891f54fc58f3f5988185ab0d826016417d954bb 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/char_filter.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/char_filter.go @@ -25,40 +25,47 @@ func RegisterCharFilter(name string, constructor CharFilterConstructor) { type CharFilterConstructor func(config map[string]interface{}, cache *Cache) (analysis.CharFilter, error) type CharFilterRegistry map[string]CharFilterConstructor -type CharFilterCache map[string]analysis.CharFilter -func (c CharFilterCache) CharFilterNamed(name string, cache *Cache) (analysis.CharFilter, error) { - charFilter, cached := c[name] - if cached { - return charFilter, nil +type CharFilterCache struct { + *ConcurrentCache +} + +func NewCharFilterCache() *CharFilterCache { + return &CharFilterCache{ + NewConcurrentCache(), } - charFilterConstructor, registered := charFilters[name] +} + +func CharFilterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { + cons, registered := charFilters[name] if !registered { return nil, fmt.Errorf("no char filter with name or type '%s' registered", name) } - charFilter, err := charFilterConstructor(nil, cache) + charFilter, err := cons(config, cache) if err != nil { return nil, fmt.Errorf("error building char filter: %v", err) } - c[name] = charFilter return charFilter, nil } -func (c CharFilterCache) DefineCharFilter(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.CharFilter, error) { - _, cached := c[name] - if cached { - return nil, fmt.Errorf("char filter named '%s' already defined", name) - } - charFilterConstructor, registered := charFilters[typ] - if !registered { - return nil, fmt.Errorf("no char filter type '%s' registered", typ) +func (c *CharFilterCache) CharFilterNamed(name string, cache *Cache) (analysis.CharFilter, error) { + item, err := c.ItemNamed(name, cache, CharFilterBuild) + if err != nil { + return nil, err } - charFilter, err := charFilterConstructor(config, cache) + return item.(analysis.CharFilter), nil +} + +func (c *CharFilterCache) DefineCharFilter(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.CharFilter, error) { + item, err := c.DefineItem(name, typ, config, cache, CharFilterBuild) if err != nil { - return nil, fmt.Errorf("error building char filter: %v", err) + if err == ErrAlreadyDefined { + return nil, fmt.Errorf("char filter named '%s' already defined", name) + } else { + return nil, err + } } - c[name] = charFilter - return charFilter, nil + return item.(analysis.CharFilter), nil } func CharFilterTypesAndInstances() ([]string, []string) { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/datetime_parser.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/datetime_parser.go index cfb38b2ccb28289399909aa6edff8da7569e2341..b2e47d3a2148ec6828a6283412e10030223cce02 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/datetime_parser.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/datetime_parser.go @@ -25,40 +25,47 @@ func RegisterDateTimeParser(name string, constructor DateTimeParserConstructor) type DateTimeParserConstructor func(config map[string]interface{}, cache *Cache) (analysis.DateTimeParser, error) type DateTimeParserRegistry map[string]DateTimeParserConstructor -type DateTimeParserCache map[string]analysis.DateTimeParser -func (c DateTimeParserCache) DateTimeParserNamed(name string, cache *Cache) (analysis.DateTimeParser, error) { - dateTimeParser, cached := c[name] - if cached { - return dateTimeParser, nil +type DateTimeParserCache struct { + *ConcurrentCache +} + +func NewDateTimeParserCache() *DateTimeParserCache { + return &DateTimeParserCache{ + NewConcurrentCache(), } - dateTimeParserConstructor, registered := dateTimeParsers[name] +} + +func DateTimeParserBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { + cons, registered := dateTimeParsers[name] if !registered { return nil, fmt.Errorf("no date time parser with name or type '%s' registered", name) } - dateTimeParser, err := dateTimeParserConstructor(nil, cache) + dateTimeParser, err := cons(config, cache) if err != nil { - return nil, fmt.Errorf("error building date time parse: %v", err) + return nil, fmt.Errorf("error building date time parser: %v", err) } - c[name] = dateTimeParser return dateTimeParser, nil } -func (c DateTimeParserCache) DefineDateTimeParser(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.DateTimeParser, error) { - _, cached := c[name] - if cached { - return nil, fmt.Errorf("date time parser named '%s' already defined", name) - } - dateTimeParserConstructor, registered := dateTimeParsers[typ] - if !registered { - return nil, fmt.Errorf("no date time parser type '%s' registered", typ) +func (c *DateTimeParserCache) DateTimeParserNamed(name string, cache *Cache) (analysis.DateTimeParser, error) { + item, err := c.ItemNamed(name, cache, DateTimeParserBuild) + if err != nil { + return nil, err } - dateTimeParser, err := dateTimeParserConstructor(config, cache) + return item.(analysis.DateTimeParser), nil +} + +func (c *DateTimeParserCache) DefineDateTimeParser(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.DateTimeParser, error) { + item, err := c.DefineItem(name, typ, config, cache, DateTimeParserBuild) if err != nil { - return nil, fmt.Errorf("error building date time parser: %v", err) + if err == ErrAlreadyDefined { + return nil, fmt.Errorf("date time parser named '%s' already defined", name) + } else { + return nil, err + } } - c[name] = dateTimeParser - return dateTimeParser, nil + return item.(analysis.DateTimeParser), nil } func DateTimeParserTypesAndInstances() ([]string, []string) { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/fragment_formatter.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/fragment_formatter.go index 0fa786218710657794ca880533cf82f1adf6a88d..977e9ff86856145bf99fac5492b09ef7798e6e50 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/fragment_formatter.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/fragment_formatter.go @@ -25,40 +25,47 @@ func RegisterFragmentFormatter(name string, constructor FragmentFormatterConstru type FragmentFormatterConstructor func(config map[string]interface{}, cache *Cache) (highlight.FragmentFormatter, error) type FragmentFormatterRegistry map[string]FragmentFormatterConstructor -type FragmentFormatterCache map[string]highlight.FragmentFormatter -func (c FragmentFormatterCache) FragmentFormatterNamed(name string, cache *Cache) (highlight.FragmentFormatter, error) { - fragmentFormatter, cached := c[name] - if cached { - return fragmentFormatter, nil +type FragmentFormatterCache struct { + *ConcurrentCache +} + +func NewFragmentFormatterCache() *FragmentFormatterCache { + return &FragmentFormatterCache{ + NewConcurrentCache(), } - fragmentFormatterConstructor, registered := fragmentFormatters[name] +} + +func FragmentFormatterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { + cons, registered := fragmentFormatters[name] if !registered { return nil, fmt.Errorf("no fragment formatter with name or type '%s' registered", name) } - fragmentFormatter, err := fragmentFormatterConstructor(nil, cache) + fragmentFormatter, err := cons(config, cache) if err != nil { return nil, fmt.Errorf("error building fragment formatter: %v", err) } - c[name] = fragmentFormatter return fragmentFormatter, nil } -func (c FragmentFormatterCache) DefineFragmentFormatter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.FragmentFormatter, error) { - _, cached := c[name] - if cached { - return nil, fmt.Errorf("fragment formatter named '%s' already defined", name) - } - fragmentFormatterConstructor, registered := fragmentFormatters[typ] - if !registered { - return nil, fmt.Errorf("no fragment formatter type '%s' registered", typ) +func (c *FragmentFormatterCache) FragmentFormatterNamed(name string, cache *Cache) (highlight.FragmentFormatter, error) { + item, err := c.ItemNamed(name, cache, FragmentFormatterBuild) + if err != nil { + return nil, err } - fragmentFormatter, err := fragmentFormatterConstructor(config, cache) + return item.(highlight.FragmentFormatter), nil +} + +func (c *FragmentFormatterCache) DefineFragmentFormatter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.FragmentFormatter, error) { + item, err := c.DefineItem(name, typ, config, cache, FragmentFormatterBuild) if err != nil { - return nil, fmt.Errorf("error building fragment formatter: %v", err) + if err == ErrAlreadyDefined { + return nil, fmt.Errorf("fragment formatter named '%s' already defined", name) + } else { + return nil, err + } } - c[name] = fragmentFormatter - return fragmentFormatter, nil + return item.(highlight.FragmentFormatter), nil } func FragmentFormatterTypesAndInstances() ([]string, []string) { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/fragmenter.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/fragmenter.go index d604f7729fe953a4774cef215ba9db91fcbf74d3..a74d7d6eb54736234f7a2a247210be4d10511a83 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/fragmenter.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/fragmenter.go @@ -25,40 +25,47 @@ func RegisterFragmenter(name string, constructor FragmenterConstructor) { type FragmenterConstructor func(config map[string]interface{}, cache *Cache) (highlight.Fragmenter, error) type FragmenterRegistry map[string]FragmenterConstructor -type FragmenterCache map[string]highlight.Fragmenter -func (c FragmenterCache) FragmenterNamed(name string, cache *Cache) (highlight.Fragmenter, error) { - fragmenter, cached := c[name] - if cached { - return fragmenter, nil +type FragmenterCache struct { + *ConcurrentCache +} + +func NewFragmenterCache() *FragmenterCache { + return &FragmenterCache{ + NewConcurrentCache(), } - fragmenterConstructor, registered := fragmenters[name] +} + +func FragmenterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { + cons, registered := fragmenters[name] if !registered { return nil, fmt.Errorf("no fragmenter with name or type '%s' registered", name) } - fragmenter, err := fragmenterConstructor(nil, cache) + fragmenter, err := cons(config, cache) if err != nil { return nil, fmt.Errorf("error building fragmenter: %v", err) } - c[name] = fragmenter return fragmenter, nil } -func (c FragmenterCache) DefineFragmenter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.Fragmenter, error) { - _, cached := c[name] - if cached { - return nil, fmt.Errorf("fragmenter named '%s' already defined", name) - } - fragmenterConstructor, registered := fragmenters[typ] - if !registered { - return nil, fmt.Errorf("no fragmenter type '%s' registered", typ) +func (c *FragmenterCache) FragmenterNamed(name string, cache *Cache) (highlight.Fragmenter, error) { + item, err := c.ItemNamed(name, cache, FragmenterBuild) + if err != nil { + return nil, err } - fragmenter, err := fragmenterConstructor(config, cache) + return item.(highlight.Fragmenter), nil +} + +func (c *FragmenterCache) DefineFragmenter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.Fragmenter, error) { + item, err := c.DefineItem(name, typ, config, cache, FragmenterBuild) if err != nil { - return nil, fmt.Errorf("error building fragmenter: %v", err) + if err == ErrAlreadyDefined { + return nil, fmt.Errorf("fragmenter named '%s' already defined", name) + } else { + return nil, err + } } - c[name] = fragmenter - return fragmenter, nil + return item.(highlight.Fragmenter), nil } func FragmenterTypesAndInstances() ([]string, []string) { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/highlighter.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/highlighter.go index 9db3c2298944767afd45eedc1d2ef97d052b0d6c..2a862f7ee8279132ef35e3f35c0d0d565e3b1b0b 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/highlighter.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/highlighter.go @@ -25,40 +25,47 @@ func RegisterHighlighter(name string, constructor HighlighterConstructor) { type HighlighterConstructor func(config map[string]interface{}, cache *Cache) (highlight.Highlighter, error) type HighlighterRegistry map[string]HighlighterConstructor -type HighlighterCache map[string]highlight.Highlighter -func (c HighlighterCache) HighlighterNamed(name string, cache *Cache) (highlight.Highlighter, error) { - highlighter, cached := c[name] - if cached { - return highlighter, nil +type HighlighterCache struct { + *ConcurrentCache +} + +func NewHighlighterCache() *HighlighterCache { + return &HighlighterCache{ + NewConcurrentCache(), } - highlighterConstructor, registered := highlighters[name] +} + +func HighlighterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { + cons, registered := highlighters[name] if !registered { return nil, fmt.Errorf("no highlighter with name or type '%s' registered", name) } - highlighter, err := highlighterConstructor(nil, cache) + highlighter, err := cons(config, cache) if err != nil { return nil, fmt.Errorf("error building highlighter: %v", err) } - c[name] = highlighter return highlighter, nil } -func (c HighlighterCache) DefineHighlighter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.Highlighter, error) { - _, cached := c[name] - if cached { - return nil, fmt.Errorf("highlighter named '%s' already defined", name) - } - highlighterConstructor, registered := highlighters[typ] - if !registered { - return nil, fmt.Errorf("no highlighter type '%s' registered", typ) +func (c *HighlighterCache) HighlighterNamed(name string, cache *Cache) (highlight.Highlighter, error) { + item, err := c.ItemNamed(name, cache, HighlighterBuild) + if err != nil { + return nil, err } - highlighter, err := highlighterConstructor(config, cache) + return item.(highlight.Highlighter), nil +} + +func (c *HighlighterCache) DefineHighlighter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.Highlighter, error) { + item, err := c.DefineItem(name, typ, config, cache, HighlighterBuild) if err != nil { - return nil, fmt.Errorf("error building highlighter: %v", err) + if err == ErrAlreadyDefined { + return nil, fmt.Errorf("highlighter named '%s' already defined", name) + } else { + return nil, err + } } - c[name] = highlighter - return highlighter, nil + return item.(highlight.Highlighter), nil } func HighlighterTypesAndInstances() ([]string, []string) { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/registry.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/registry.go index 63428a4fe823d90b527ebabe758dcaf57c4b2f37..c99a3743e1bdf4827421deb2e7e42e92d2ef29cb 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/registry.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/registry.go @@ -35,28 +35,28 @@ var analyzers = make(AnalyzerRegistry, 0) var dateTimeParsers = make(DateTimeParserRegistry, 0) type Cache struct { - CharFilters CharFilterCache - Tokenizers TokenizerCache - TokenMaps TokenMapCache - TokenFilters TokenFilterCache - Analyzers AnalyzerCache - DateTimeParsers DateTimeParserCache - FragmentFormatters FragmentFormatterCache - Fragmenters FragmenterCache - Highlighters HighlighterCache + CharFilters *CharFilterCache + Tokenizers *TokenizerCache + TokenMaps *TokenMapCache + TokenFilters *TokenFilterCache + Analyzers *AnalyzerCache + DateTimeParsers *DateTimeParserCache + FragmentFormatters *FragmentFormatterCache + Fragmenters *FragmenterCache + Highlighters *HighlighterCache } func NewCache() *Cache { return &Cache{ - CharFilters: make(CharFilterCache, 0), - Tokenizers: make(TokenizerCache, 0), - TokenMaps: make(TokenMapCache, 0), - TokenFilters: make(TokenFilterCache, 0), - Analyzers: make(AnalyzerCache, 0), - DateTimeParsers: make(DateTimeParserCache, 0), - FragmentFormatters: make(FragmentFormatterCache, 0), - Fragmenters: make(FragmenterCache, 0), - Highlighters: make(HighlighterCache, 0), + CharFilters: NewCharFilterCache(), + Tokenizers: NewTokenizerCache(), + TokenMaps: NewTokenMapCache(), + TokenFilters: NewTokenFilterCache(), + Analyzers: NewAnalyzerCache(), + DateTimeParsers: NewDateTimeParserCache(), + FragmentFormatters: NewFragmentFormatterCache(), + Fragmenters: NewFragmenterCache(), + Highlighters: NewHighlighterCache(), } } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/token_filter.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/token_filter.go index 9f712b022aed31d50a3b449d85939c439d38b234..b7dfdcbce3376ef584935287a15b676632cf93aa 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/token_filter.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/token_filter.go @@ -25,40 +25,47 @@ func RegisterTokenFilter(name string, constructor TokenFilterConstructor) { type TokenFilterConstructor func(config map[string]interface{}, cache *Cache) (analysis.TokenFilter, error) type TokenFilterRegistry map[string]TokenFilterConstructor -type TokenFilterCache map[string]analysis.TokenFilter -func (c TokenFilterCache) TokenFilterNamed(name string, cache *Cache) (analysis.TokenFilter, error) { - tokenFilter, cached := c[name] - if cached { - return tokenFilter, nil +type TokenFilterCache struct { + *ConcurrentCache +} + +func NewTokenFilterCache() *TokenFilterCache { + return &TokenFilterCache{ + NewConcurrentCache(), } - tokenFilterConstructor, registered := tokenFilters[name] +} + +func TokenFilterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { + cons, registered := tokenFilters[name] if !registered { return nil, fmt.Errorf("no token filter with name or type '%s' registered", name) } - tokenFilter, err := tokenFilterConstructor(nil, cache) + tokenFilter, err := cons(config, cache) if err != nil { return nil, fmt.Errorf("error building token filter: %v", err) } - c[name] = tokenFilter return tokenFilter, nil } -func (c TokenFilterCache) DefineTokenFilter(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.TokenFilter, error) { - _, cached := c[name] - if cached { - return nil, fmt.Errorf("token filter named '%s' already defined", name) - } - tokenFilterConstructor, registered := tokenFilters[typ] - if !registered { - return nil, fmt.Errorf("no token filter type '%s' registered", typ) +func (c *TokenFilterCache) TokenFilterNamed(name string, cache *Cache) (analysis.TokenFilter, error) { + item, err := c.ItemNamed(name, cache, TokenFilterBuild) + if err != nil { + return nil, err } - tokenFilter, err := tokenFilterConstructor(config, cache) + return item.(analysis.TokenFilter), nil +} + +func (c *TokenFilterCache) DefineTokenFilter(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.TokenFilter, error) { + item, err := c.DefineItem(name, typ, config, cache, TokenFilterBuild) if err != nil { - return nil, fmt.Errorf("error building token filter: %v", err) + if err == ErrAlreadyDefined { + return nil, fmt.Errorf("token filter named '%s' already defined", name) + } else { + return nil, err + } } - c[name] = tokenFilter - return tokenFilter, nil + return item.(analysis.TokenFilter), nil } func TokenFilterTypesAndInstances() ([]string, []string) { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/token_maps.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/token_maps.go index 8bcda16f9d0e11bb585b452e0f244c025f571e32..e2cfc1c09103504d4b6e7dd284f163e9e7b73d6c 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/token_maps.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/token_maps.go @@ -25,40 +25,47 @@ func RegisterTokenMap(name string, constructor TokenMapConstructor) { type TokenMapConstructor func(config map[string]interface{}, cache *Cache) (analysis.TokenMap, error) type TokenMapRegistry map[string]TokenMapConstructor -type TokenMapCache map[string]analysis.TokenMap -func (c TokenMapCache) TokenMapNamed(name string, cache *Cache) (analysis.TokenMap, error) { - tokenMap, cached := c[name] - if cached { - return tokenMap, nil +type TokenMapCache struct { + *ConcurrentCache +} + +func NewTokenMapCache() *TokenMapCache { + return &TokenMapCache{ + NewConcurrentCache(), } - tokenMapConstructor, registered := tokenMaps[name] +} + +func TokenMapBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { + cons, registered := tokenMaps[name] if !registered { return nil, fmt.Errorf("no token map with name or type '%s' registered", name) } - tokenMap, err := tokenMapConstructor(nil, cache) + tokenMap, err := cons(config, cache) if err != nil { return nil, fmt.Errorf("error building token map: %v", err) } - c[name] = tokenMap return tokenMap, nil } -func (c TokenMapCache) DefineTokenMap(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.TokenMap, error) { - _, cached := c[name] - if cached { - return nil, fmt.Errorf("token map named '%s' already defined", name) - } - tokenMapConstructor, registered := tokenMaps[typ] - if !registered { - return nil, fmt.Errorf("no token map type '%s' registered", typ) +func (c *TokenMapCache) TokenMapNamed(name string, cache *Cache) (analysis.TokenMap, error) { + item, err := c.ItemNamed(name, cache, TokenMapBuild) + if err != nil { + return nil, err } - tokenMap, err := tokenMapConstructor(config, cache) + return item.(analysis.TokenMap), nil +} + +func (c *TokenMapCache) DefineTokenMap(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.TokenMap, error) { + item, err := c.DefineItem(name, typ, config, cache, TokenMapBuild) if err != nil { - return nil, fmt.Errorf("error building token map: %v", err) + if err == ErrAlreadyDefined { + return nil, fmt.Errorf("token map named '%s' already defined", name) + } else { + return nil, err + } } - c[name] = tokenMap - return tokenMap, nil + return item.(analysis.TokenMap), nil } func TokenMapTypesAndInstances() ([]string, []string) { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/tokenizer.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/tokenizer.go index 1d40c4ecd64aa4ae7384f9009c4d04cbfd19d260..133bd78e1fc405a781f55b6db082df6191123c59 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/tokenizer.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/registry/tokenizer.go @@ -25,40 +25,47 @@ func RegisterTokenizer(name string, constructor TokenizerConstructor) { type TokenizerConstructor func(config map[string]interface{}, cache *Cache) (analysis.Tokenizer, error) type TokenizerRegistry map[string]TokenizerConstructor -type TokenizerCache map[string]analysis.Tokenizer -func (c TokenizerCache) TokenizerNamed(name string, cache *Cache) (analysis.Tokenizer, error) { - tokenizer, cached := c[name] - if cached { - return tokenizer, nil +type TokenizerCache struct { + *ConcurrentCache +} + +func NewTokenizerCache() *TokenizerCache { + return &TokenizerCache{ + NewConcurrentCache(), } - tokenizerConstructor, registered := tokenizers[name] +} + +func TokenizerBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { + cons, registered := tokenizers[name] if !registered { return nil, fmt.Errorf("no tokenizer with name or type '%s' registered", name) } - tokenizer, err := tokenizerConstructor(nil, cache) + tokenizer, err := cons(config, cache) if err != nil { - return nil, fmt.Errorf("error building tokenizer '%s': %v", name, err) + return nil, fmt.Errorf("error building tokenizer: %v", err) } - c[name] = tokenizer return tokenizer, nil } -func (c TokenizerCache) DefineTokenizer(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.Tokenizer, error) { - _, cached := c[name] - if cached { - return nil, fmt.Errorf("tokenizer named '%s' already defined", name) - } - tokenizerConstructor, registered := tokenizers[typ] - if !registered { - return nil, fmt.Errorf("no tokenizer type '%s' registered", typ) +func (c *TokenizerCache) TokenizerNamed(name string, cache *Cache) (analysis.Tokenizer, error) { + item, err := c.ItemNamed(name, cache, TokenizerBuild) + if err != nil { + return nil, err } - tokenizer, err := tokenizerConstructor(config, cache) + return item.(analysis.Tokenizer), nil +} + +func (c *TokenizerCache) DefineTokenizer(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.Tokenizer, error) { + item, err := c.DefineItem(name, typ, config, cache, TokenizerBuild) if err != nil { - return nil, fmt.Errorf("error building tokenizer '%s': %v", name, err) + if err == ErrAlreadyDefined { + return nil, fmt.Errorf("tokenizer named '%s' already defined", name) + } else { + return nil, err + } } - c[name] = tokenizer - return tokenizer, nil + return item.(analysis.Tokenizer), nil } func TokenizerTypesAndInstances() ([]string, []string) { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search.go index fee1ad40f3134e922e4036ce912a0fd229261eb1..0709f3b9bccaee501bedafe72b389068c4f0bc6e 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search.go @@ -182,7 +182,7 @@ func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) { func (r *SearchRequest) UnmarshalJSON(input []byte) error { var temp struct { Q json.RawMessage `json:"query"` - Size int `json:"size"` + Size *int `json:"size"` From int `json:"from"` Highlight *HighlightRequest `json:"highlight"` Fields []string `json:"fields"` @@ -195,7 +195,11 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { return err } - r.Size = temp.Size + if temp.Size == nil { + r.Size = 10 + } else { + r.Size = *temp.Size + } r.From = temp.From r.Explain = temp.Explain r.Highlight = temp.Highlight @@ -236,9 +240,47 @@ func NewSearchRequestOptions(q Query, size, from int, explain bool) *SearchReque } } +// IndexErrMap tracks errors with the name of the index where it occurred +type IndexErrMap map[string]error + +// MarshalJSON seralizes the error into a string for JSON consumption +func (iem IndexErrMap) MarshalJSON() ([]byte, error) { + tmp := make(map[string]string, len(iem)) + for k, v := range iem { + tmp[k] = v.Error() + } + return json.Marshal(tmp) +} + +// SearchStatus is a secion in the SearchResult reporting how many +// underlying indexes were queried, how many were successful/failed +// and a map of any errors that were encountered +type SearchStatus struct { + Total int `json:"total"` + Failed int `json:"failed"` + Successful int `json:"successful"` + Errors IndexErrMap `json:"errors,omitempty"` +} + +// Merge will merge together multiple SearchStatuses during a MultiSearch +func (ss *SearchStatus) Merge(other *SearchStatus) { + ss.Total += other.Total + ss.Failed += other.Failed + ss.Successful += other.Successful + if len(other.Errors) > 0 { + if ss.Errors == nil { + ss.Errors = make(map[string]error) + } + for otherIndex, otherError := range other.Errors { + ss.Errors[otherIndex] = otherError + } + } +} + // A SearchResult describes the results of executing // a SearchRequest. type SearchResult struct { + Status *SearchStatus `json:"status"` Request *SearchRequest `json:"request"` Hits search.DocumentMatchCollection `json:"hits"` Total uint64 `json:"total_hits"` @@ -288,7 +330,9 @@ func (sr *SearchResult) String() string { return rv } +// Merge will merge together multiple SearchResults during a MultiSearch func (sr *SearchResult) Merge(other *SearchResult) { + sr.Status.Merge(other.Status) sr.Hits = append(sr.Hits, other.Hits...) sr.Total += other.Total if other.MaxScore > sr.MaxScore { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/collector.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/collector.go index 86e901afb567296250a05d5aae8645373ae84883..773c8d554c9b39cf1d9b33d67988252844a8d6ea 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/collector.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/collector.go @@ -11,10 +11,12 @@ package search import ( "time" + + "golang.org/x/net/context" ) type Collector interface { - Collect(searcher Searcher) error + Collect(ctx context.Context, searcher Searcher) error Results() DocumentMatchCollection Total() uint64 MaxScore() float64 diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/collectors/collector_top_score.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/collectors/collector_top_score.go index b919815063306164f291d47f6f489174022e0a0a..84d6d1379b8a5fc7910d28b7b7a624aea717dc04 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/collectors/collector_top_score.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/collectors/collector_top_score.go @@ -13,6 +13,8 @@ import ( "container/list" "time" + "golang.org/x/net/context" + "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/search" ) @@ -54,18 +56,30 @@ func (tksc *TopScoreCollector) Took() time.Duration { return tksc.took } -func (tksc *TopScoreCollector) Collect(searcher search.Searcher) error { +func (tksc *TopScoreCollector) Collect(ctx context.Context, searcher search.Searcher) error { startTime := time.Now() - next, err := searcher.Next() + var err error + var next *search.DocumentMatch + select { + case <-ctx.Done(): + return ctx.Err() + default: + next, err = searcher.Next() + } for err == nil && next != nil { - tksc.collectSingle(next) - if tksc.facetsBuilder != nil { - err = tksc.facetsBuilder.Update(next) - if err != nil { - break + select { + case <-ctx.Done(): + return ctx.Err() + default: + tksc.collectSingle(next) + if tksc.facetsBuilder != nil { + err = tksc.facetsBuilder.Update(next) + if err != nil { + break + } } + next, err = searcher.Next() } - next, err = searcher.Next() } // compute search duration tksc.took = time.Since(startTime) diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/facets/facet_builder_datetime.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/facets/facet_builder_datetime.go index 5dfa598d1d2539f9349f3642dc520a33f25ddff0..b3e265f342b2c2beaaeb711d4b391d72936b105c 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/facets/facet_builder_datetime.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/facets/facet_builder_datetime.go @@ -10,7 +10,7 @@ package facets import ( - "container/list" + "sort" "time" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index" @@ -90,11 +90,8 @@ func (fb *DateTimeFacetBuilder) Result() *search.FacetResult { Missing: fb.missing, } - // FIXME better implementation needed here this is quick and dirty - topN := list.New() + rv.DateRanges = make([]*search.DateRangeFacet, 0, len(fb.termsCount)) - // walk entries and find top N -OUTER: for term, count := range fb.termsCount { dateRange := fb.ranges[term] tf := &search.DateRangeFacet{ @@ -109,37 +106,19 @@ OUTER: end := dateRange.end.Format(time.RFC3339Nano) tf.End = &end } + rv.DateRanges = append(rv.DateRanges, tf) + } - for e := topN.Front(); e != nil; e = e.Next() { - curr := e.Value.(*search.DateRangeFacet) - if tf.Count < curr.Count { - - topN.InsertBefore(tf, e) - // if we just made the list too long - if topN.Len() > fb.size { - // remove the head - topN.Remove(topN.Front()) - } - continue OUTER - } - } - // if we got to the end, we still have to add it - topN.PushBack(tf) - if topN.Len() > fb.size { - // remove the head - topN.Remove(topN.Front()) - } + sort.Sort(rv.DateRanges) + // we now have the list of the top N facets + if fb.size < len(rv.DateRanges) { + rv.DateRanges = rv.DateRanges[:fb.size] } - // we now have the list of the top N facets - rv.DateRanges = make([]*search.DateRangeFacet, topN.Len()) - i := 0 notOther := 0 - for e := topN.Back(); e != nil; e = e.Prev() { - rv.DateRanges[i] = e.Value.(*search.DateRangeFacet) - i++ - notOther += e.Value.(*search.DateRangeFacet).Count + for _, nr := range rv.DateRanges { + notOther += nr.Count } rv.Other = fb.total - notOther diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/facets_builder.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/facets_builder.go index b511fda3edb8171a7a805ae1492f73b691904690..b6ecc8a6a04f85c16d0d4ea59c68ab0316e7ecb5 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/facets_builder.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/facets_builder.go @@ -66,9 +66,14 @@ func (tf TermFacets) Add(termFacet *TermFacet) TermFacets { return tf } -func (tf TermFacets) Len() int { return len(tf) } -func (tf TermFacets) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] } -func (tf TermFacets) Less(i, j int) bool { return tf[i].Count > tf[j].Count } +func (tf TermFacets) Len() int { return len(tf) } +func (tf TermFacets) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] } +func (tf TermFacets) Less(i, j int) bool { + if tf[i].Count == tf[j].Count { + return tf[i].Term < tf[j].Term + } + return tf[i].Count > tf[j].Count +} type NumericRangeFacet struct { Name string `json:"name"` @@ -91,9 +96,14 @@ func (nrf NumericRangeFacets) Add(numericRangeFacet *NumericRangeFacet) NumericR return nrf } -func (nrf NumericRangeFacets) Len() int { return len(nrf) } -func (nrf NumericRangeFacets) Swap(i, j int) { nrf[i], nrf[j] = nrf[j], nrf[i] } -func (nrf NumericRangeFacets) Less(i, j int) bool { return nrf[i].Count > nrf[j].Count } +func (nrf NumericRangeFacets) Len() int { return len(nrf) } +func (nrf NumericRangeFacets) Swap(i, j int) { nrf[i], nrf[j] = nrf[j], nrf[i] } +func (nrf NumericRangeFacets) Less(i, j int) bool { + if nrf[i].Count == nrf[j].Count { + return nrf[i].Name < nrf[j].Name + } + return nrf[i].Count > nrf[j].Count +} type DateRangeFacet struct { Name string `json:"name"` @@ -102,11 +112,34 @@ type DateRangeFacet struct { Count int `json:"count"` } +func (drf *DateRangeFacet) Same(other *DateRangeFacet) bool { + if drf.Start == nil && other.Start != nil { + return false + } + if drf.Start != nil && other.Start == nil { + return false + } + if drf.Start != nil && other.Start != nil && *drf.Start != *other.Start { + return false + } + if drf.End == nil && other.End != nil { + return false + } + if drf.End != nil && other.End == nil { + return false + } + if drf.End != nil && other.End != nil && *drf.End != *other.End { + return false + } + + return true +} + type DateRangeFacets []*DateRangeFacet func (drf DateRangeFacets) Add(dateRangeFacet *DateRangeFacet) DateRangeFacets { for _, existingDr := range drf { - if dateRangeFacet.Start == existingDr.Start && dateRangeFacet.End == existingDr.End { + if dateRangeFacet.Same(existingDr) { existingDr.Count += dateRangeFacet.Count return drf } @@ -116,9 +149,14 @@ func (drf DateRangeFacets) Add(dateRangeFacet *DateRangeFacet) DateRangeFacets { return drf } -func (drf DateRangeFacets) Len() int { return len(drf) } -func (drf DateRangeFacets) Swap(i, j int) { drf[i], drf[j] = drf[j], drf[i] } -func (drf DateRangeFacets) Less(i, j int) bool { return drf[i].Count > drf[j].Count } +func (drf DateRangeFacets) Len() int { return len(drf) } +func (drf DateRangeFacets) Swap(i, j int) { drf[i], drf[j] = drf[j], drf[i] } +func (drf DateRangeFacets) Less(i, j int) bool { + if drf[i].Count == drf[j].Count { + return drf[i].Name < drf[j].Name + } + return drf[i].Count > drf[j].Count +} type FacetResult struct { Field string `json:"field"` diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/fragment_formatters/ansi/fragment_formatter_ansi.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/fragment_formatters/ansi/fragment_formatter_ansi.go index 749f3b5f307dc332cff2aa3b463de5df7d15e735..9f874712fc3a42c9595d7dbcda1615cf7050b359 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/fragment_formatters/ansi/fragment_formatter_ansi.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/fragment_formatters/ansi/fragment_formatter_ansi.go @@ -35,6 +35,10 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h if termLocation == nil { continue } + // make sure the array positions match + if !highlight.SameArrayPositions(f.ArrayPositions, termLocation.ArrayPositions) { + continue + } if termLocation.Start < curr { continue } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/fragment_formatters/html/fragment_formatter_html.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/fragment_formatters/html/fragment_formatter_html.go index 6488e248d04cafa0c7bbab6ed480bf84aa520efd..2a5b0f22f213da644b22d55ce00106def091d46e 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/fragment_formatters/html/fragment_formatter_html.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/fragment_formatters/html/fragment_formatter_html.go @@ -38,6 +38,10 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h if termLocation == nil { continue } + // make sure the array positions match + if !highlight.SameArrayPositions(f.ArrayPositions, termLocation.ArrayPositions) { + continue + } if termLocation.Start < curr { continue } diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/highlighters/simple/fragment_scorer_simple.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/highlighters/simple/fragment_scorer_simple.go index ccc93d006e890b28f6429de99aca67753f1325a7..793a905513cd04185eee8cbca4cba1e6dbc52cae 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/highlighters/simple/fragment_scorer_simple.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/highlighters/simple/fragment_scorer_simple.go @@ -32,7 +32,7 @@ func (s *FragmentScorer) Score(f *highlight.Fragment) { OUTER: for _, locations := range s.tlm { for _, location := range locations { - if sameArrayPositions(f.ArrayPositions, location.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End { + if highlight.SameArrayPositions(f.ArrayPositions, location.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End { score += 1.0 // once we find a term in the fragment // don't care about additional matches diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/highlighters/simple/highlighter_simple.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/highlighters/simple/highlighter_simple.go index a4d80ad9a8ac6818c83cd14dc5c956e7ae3583a4..0312f3a96412283e0eec9a9a0c99c2d8840d1635 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/highlighters/simple/highlighter_simple.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/highlighters/simple/highlighter_simple.go @@ -80,10 +80,9 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume if f.Name() == field { _, ok := f.(*document.TextField) if ok { - termLocationsSameArrayPosition := make(highlight.TermLocations, 0) for _, otl := range orderedTermLocations { - if sameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) { + if highlight.SameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) { termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl) } } @@ -152,18 +151,6 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume return formattedFragments } -func sameArrayPositions(fieldArrayPositions []uint64, termLocationArrayPositions []float64) bool { - if len(fieldArrayPositions) != len(termLocationArrayPositions) { - return false - } - for i := 0; i < len(fieldArrayPositions); i++ { - if fieldArrayPositions[i] != uint64(termLocationArrayPositions[i]) { - return false - } - } - return true -} - // FragmentQueue implements heap.Interface and holds Items. type FragmentQueue []*highlight.Fragment diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/term_locations.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/term_locations.go index a99277bef99aa29c81f7bcd3ec325dc7fc323f54..e0f92d38eed1734bccc661e6d12ae1b7d5e4adcc 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/term_locations.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/highlight/term_locations.go @@ -98,3 +98,15 @@ func OrderTermLocations(tlm search.TermLocationMap) TermLocations { sort.Sort(rv) return rv } + +func SameArrayPositions(fieldArrayPositions []uint64, termLocationArrayPositions []float64) bool { + if len(fieldArrayPositions) != len(termLocationArrayPositions) { + return false + } + for i := 0; i < len(fieldArrayPositions); i++ { + if fieldArrayPositions[i] != uint64(termLocationArrayPositions[i]) { + return false + } + } + return true +} diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/searchers/search_fuzzy.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/searchers/search_fuzzy.go index aa865e6dd2c2e2a43df536f686d1777d2b775fec..9a6808d1f5e6a41dba8c759154bffdbe1838aab5 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/searchers/search_fuzzy.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/searchers/search_fuzzy.go @@ -55,6 +55,11 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzzin return nil, err } + err = fieldDict.Close() + if err != nil { + return nil, err + } + // enumerate all the terms in the range qsearchers := make([]search.Searcher, 0, 25) diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/searchers/search_regexp.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/searchers/search_regexp.go index 51a195950136ccf141e8b5a73e3fb62ef1472f1b..681ace28f0cf04ac314490c3bfb71b3a011d72a3 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/searchers/search_regexp.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/searchers/search_regexp.go @@ -51,6 +51,11 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, fi if err != nil { return nil, err } + + err = fieldDict.Close() + if err != nil { + return nil, err + } } // enumerate all the terms in the range diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/searchers/search_term_prefix.go b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/searchers/search_term_prefix.go index b1a325b8aef4bd1a156c2a7026c3d4f223cdc7b6..b04e799794ca003df72cae5341928aa24824a4e4 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/search/searchers/search_term_prefix.go +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/search/searchers/search_term_prefix.go @@ -37,6 +37,12 @@ func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, field s qsearchers = append(qsearchers, qsearcher) tfd, err = fieldDict.Next() } + + err = fieldDict.Close() + if err != nil { + return nil, err + } + // build disjunction searcher of these ranges searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain) if err != nil { diff --git a/Godeps/_workspace/src/github.com/blevesearch/bleve/test/tests/basic/searches.json b/Godeps/_workspace/src/github.com/blevesearch/bleve/test/tests/basic/searches.json index dd44a86aeb5956ab631aacd5485d27655b6480dd..c00b7493729a791c5fce78bdeace0f45c3007960 100644 --- a/Godeps/_workspace/src/github.com/blevesearch/bleve/test/tests/basic/searches.json +++ b/Godeps/_workspace/src/github.com/blevesearch/bleve/test/tests/basic/searches.json @@ -617,6 +617,25 @@ "hits": [] } }, + { + "comment": "test wildcard matching term", + "search": { + "from": 0, + "size": 10, + "query": { + "field": "name", + "wildcard": "mar*" + } + }, + "result": { + "total_hits": 1, + "hits": [ + { + "id": "a" + } + ] + } + }, { "comment": "test boost - term query", "search": { diff --git a/Godeps/_workspace/src/github.com/boltdb/bolt/README.md b/Godeps/_workspace/src/github.com/boltdb/bolt/README.md index 1728d65be2e3f4cbfe029d62e28534a0b26214af..4c9312c08c5bcff0e6f0e11a6c1ba707d633500e 100644 --- a/Godeps/_workspace/src/github.com/boltdb/bolt/README.md +++ b/Godeps/_workspace/src/github.com/boltdb/bolt/README.md @@ -1,4 +1,4 @@ -Bolt [](https://drone.io/github.com/boltdb/bolt/latest) [](https://coveralls.io/r/boltdb/bolt?branch=master) [](https://godoc.org/github.com/boltdb/bolt)  +Bolt [](https://coveralls.io/r/boltdb/bolt?branch=master) [](https://godoc.org/github.com/boltdb/bolt)  ==== Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas] @@ -427,6 +427,8 @@ db.View(func(tx *bolt.Tx) error { }) ``` +Note that, while RFC3339 is sortable, the Golang implementation of RFC3339Nano does not use a fixed number of digits after the decimal point and is therefore not sortable. + #### ForEach() @@ -437,7 +439,7 @@ all the keys in a bucket: db.View(func(tx *bolt.Tx) error { // Assume bucket exists and has keys b := tx.Bucket([]byte("MyBucket")) - + b.ForEach(func(k, v []byte) error { fmt.Printf("key=%s, value=%s\n", k, v) return nil @@ -617,7 +619,7 @@ Boltmobiledemo.BoltDB boltDB = Boltmobiledemo.NewBoltDB(path) { NSURL* URL= [NSURL fileURLWithPath: filePathString]; assert([[NSFileManager defaultManager] fileExistsAtPath: [URL path]]); - + NSError *error = nil; BOOL success = [URL setResourceValue: [NSNumber numberWithBool: YES] forKey: NSURLIsExcludedFromBackupKey error: &error]; @@ -835,7 +837,12 @@ Below is a list of public, open source projects that use Bolt: backed by boltdb. * [buckets](https://github.com/joyrexus/buckets) - a bolt wrapper streamlining simple tx and key scans. +* [mbuckets](https://github.com/abhigupta912/mbuckets) - A Bolt wrapper that allows easy operations on multi level (nested) buckets. * [Request Baskets](https://github.com/darklynx/request-baskets) - A web service to collect arbitrary HTTP requests and inspect them via REST API or simple web UI, similar to [RequestBin](http://requestb.in/) service * [Go Report Card](https://goreportcard.com/) - Go code quality report cards as a (free and open source) service. +* [Boltdb Boilerplate](https://github.com/bobintornado/boltdb-boilerplate) - Boilerplate wrapper around bolt aiming to make simple calls one-liners. +* [lru](https://github.com/crowdriff/lru) - Easy to use Bolt-backed Least-Recently-Used (LRU) read-through cache with chainable remote stores. +* [Storm](https://github.com/asdine/storm) - A simple ORM around BoltDB. +* [GoWebApp](https://github.com/josephspurrier/gowebapp) - A basic MVC web application in Go using BoltDB. If you are using Bolt in a project please send a pull request to add it to the list. diff --git a/Godeps/_workspace/src/github.com/boltdb/bolt/appveyor.yml b/Godeps/_workspace/src/github.com/boltdb/bolt/appveyor.yml new file mode 100644 index 0000000000000000000000000000000000000000..6e26e941d682d0bae5f16172998befeb59a9c56c --- /dev/null +++ b/Godeps/_workspace/src/github.com/boltdb/bolt/appveyor.yml @@ -0,0 +1,18 @@ +version: "{build}" + +os: Windows Server 2012 R2 + +clone_folder: c:\gopath\src\github.com\boltdb\bolt + +environment: + GOPATH: c:\gopath + +install: + - echo %PATH% + - echo %GOPATH% + - go version + - go env + - go get -v -t ./... + +build_script: + - go test -v ./... diff --git a/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_ppc.go b/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_ppc.go new file mode 100644 index 0000000000000000000000000000000000000000..645ddc3edc2d69ee331b6a3bca33443cdf35e05b --- /dev/null +++ b/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_ppc.go @@ -0,0 +1,9 @@ +// +build ppc + +package bolt + +// maxMapSize represents the largest mmap size supported by Bolt. +const maxMapSize = 0x7FFFFFFF // 2GB + +// maxAllocSize is the size used when creating array pointers. +const maxAllocSize = 0xFFFFFFF diff --git a/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_ppc64.go b/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_ppc64.go new file mode 100644 index 0000000000000000000000000000000000000000..2dc6be02e3e3e24ee77cb018cad04f5804963c2a --- /dev/null +++ b/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_ppc64.go @@ -0,0 +1,9 @@ +// +build ppc64 + +package bolt + +// maxMapSize represents the largest mmap size supported by Bolt. +const maxMapSize = 0xFFFFFFFFFFFF // 256TB + +// maxAllocSize is the size used when creating array pointers. +const maxAllocSize = 0x7FFFFFFF diff --git a/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_unix.go b/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_unix.go index 4b0723aac23701172fe569fb57f0c9e0cb46ede5..cad62dda1e38797eb9dd1a47953b5d21d479bd12 100644 --- a/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_unix.go +++ b/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_unix.go @@ -11,7 +11,7 @@ import ( ) // flock acquires an advisory lock on a file descriptor. -func flock(f *os.File, exclusive bool, timeout time.Duration) error { +func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { var t time.Time for { // If we're beyond our timeout then return an error. @@ -27,7 +27,7 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error { } // Otherwise attempt to obtain an exclusive lock. - err := syscall.Flock(int(f.Fd()), flag|syscall.LOCK_NB) + err := syscall.Flock(int(db.file.Fd()), flag|syscall.LOCK_NB) if err == nil { return nil } else if err != syscall.EWOULDBLOCK { @@ -40,8 +40,8 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error { } // funlock releases an advisory lock on a file descriptor. -func funlock(f *os.File) error { - return syscall.Flock(int(f.Fd()), syscall.LOCK_UN) +func funlock(db *DB) error { + return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN) } // mmap memory maps a DB's data file. diff --git a/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_unix_solaris.go b/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_unix_solaris.go index 1c4e48d63a021f5a8ebf61dc9be1c4bc9c70d912..307bf2b3ee976cb1741090a0283a4be1b52c133f 100644 --- a/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_unix_solaris.go +++ b/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_unix_solaris.go @@ -11,7 +11,7 @@ import ( ) // flock acquires an advisory lock on a file descriptor. -func flock(f *os.File, exclusive bool, timeout time.Duration) error { +func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { var t time.Time for { // If we're beyond our timeout then return an error. @@ -32,7 +32,7 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error { } else { lock.Type = syscall.F_RDLCK } - err := syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &lock) + err := syscall.FcntlFlock(db.file.Fd(), syscall.F_SETLK, &lock) if err == nil { return nil } else if err != syscall.EAGAIN { @@ -45,13 +45,13 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error { } // funlock releases an advisory lock on a file descriptor. -func funlock(f *os.File) error { +func funlock(db *DB) error { var lock syscall.Flock_t lock.Start = 0 lock.Len = 0 lock.Type = syscall.F_UNLCK lock.Whence = 0 - return syscall.FcntlFlock(uintptr(f.Fd()), syscall.F_SETLK, &lock) + return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock) } // mmap memory maps a DB's data file. diff --git a/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_windows.go b/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_windows.go index 91c4968f6a179efd6cf769e1c899c7566ba1e381..d538e6afd77ac8e6000318df7aead424e6c4f5e3 100644 --- a/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_windows.go +++ b/Godeps/_workspace/src/github.com/boltdb/bolt/bolt_windows.go @@ -16,6 +16,8 @@ var ( ) const ( + lockExt = ".lock" + // see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx flagLockExclusive = 2 flagLockFailImmediately = 1 @@ -46,7 +48,16 @@ func fdatasync(db *DB) error { } // flock acquires an advisory lock on a file descriptor. -func flock(f *os.File, exclusive bool, timeout time.Duration) error { +func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { + // Create a separate lock file on windows because a process + // cannot share an exclusive lock on the same file. This is + // needed during Tx.WriteTo(). + f, err := os.OpenFile(db.path+lockExt, os.O_CREATE, mode) + if err != nil { + return err + } + db.lockfile = f + var t time.Time for { // If we're beyond our timeout then return an error. @@ -62,7 +73,7 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error { flag |= flagLockExclusive } - err := lockFileEx(syscall.Handle(f.Fd()), flag, 0, 1, 0, &syscall.Overlapped{}) + err := lockFileEx(syscall.Handle(db.lockfile.Fd()), flag, 0, 1, 0, &syscall.Overlapped{}) if err == nil { return nil } else if err != errLockViolation { @@ -75,8 +86,11 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error { } // funlock releases an advisory lock on a file descriptor. -func funlock(f *os.File) error { - return unlockFileEx(syscall.Handle(f.Fd()), 0, 1, 0, &syscall.Overlapped{}) +func funlock(db *DB) error { + err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{}) + db.lockfile.Close() + os.Remove(db.path+lockExt) + return err } // mmap memory maps a DB's data file. diff --git a/Godeps/_workspace/src/github.com/boltdb/bolt/db.go b/Godeps/_workspace/src/github.com/boltdb/bolt/db.go index 0f1e1bc3d74a209ef955e1504346a69d60bf8cc6..501d36aac24a20e084d9d5c7fe53c0e8defc523f 100644 --- a/Godeps/_workspace/src/github.com/boltdb/bolt/db.go +++ b/Godeps/_workspace/src/github.com/boltdb/bolt/db.go @@ -93,6 +93,7 @@ type DB struct { path string file *os.File + lockfile *os.File // windows only dataref []byte // mmap'ed readonly, write throws SEGV data *[maxMapSize]byte datasz int @@ -177,7 +178,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { // if !options.ReadOnly. // The database file is locked using the shared lock (more than one process may // hold a lock at the same time) otherwise (options.ReadOnly is set). - if err := flock(db.file, !db.readOnly, options.Timeout); err != nil { + if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil { _ = db.close() return nil, err } @@ -379,6 +380,10 @@ func (db *DB) Close() error { } func (db *DB) close() error { + if !db.opened { + return nil + } + db.opened = false db.freelist = nil @@ -397,7 +402,7 @@ func (db *DB) close() error { // No need to unlock read-only file. if !db.readOnly { // Unlock the file. - if err := funlock(db.file); err != nil { + if err := funlock(db); err != nil { log.Printf("bolt.Close(): funlock error: %s", err) } } @@ -824,8 +829,10 @@ func (db *DB) grow(sz int) error { // Truncate and fsync to ensure file size metadata is flushed. // https://github.com/boltdb/bolt/issues/284 if !db.NoGrowSync && !db.readOnly { - if err := db.file.Truncate(int64(sz)); err != nil { - return fmt.Errorf("file resize error: %s", err) + if runtime.GOOS != "windows" { + if err := db.file.Truncate(int64(sz)); err != nil { + return fmt.Errorf("file resize error: %s", err) + } } if err := db.file.Sync(); err != nil { return fmt.Errorf("file sync error: %s", err) diff --git a/Godeps/_workspace/src/github.com/boltdb/bolt/node.go b/Godeps/_workspace/src/github.com/boltdb/bolt/node.go index c9fb21c73149f6f9e1d03fc5b6b9fc46d8d6d9ea..e9d64af81e3ac198faa841b9272cb0c77990a39e 100644 --- a/Godeps/_workspace/src/github.com/boltdb/bolt/node.go +++ b/Godeps/_workspace/src/github.com/boltdb/bolt/node.go @@ -463,43 +463,6 @@ func (n *node) rebalance() { target = n.prevSibling() } - // If target node has extra nodes then just move one over. - if target.numChildren() > target.minKeys() { - if useNextSibling { - // Reparent and move node. - if child, ok := n.bucket.nodes[target.inodes[0].pgid]; ok { - child.parent.removeChild(child) - child.parent = n - child.parent.children = append(child.parent.children, child) - } - n.inodes = append(n.inodes, target.inodes[0]) - target.inodes = target.inodes[1:] - - // Update target key on parent. - target.parent.put(target.key, target.inodes[0].key, nil, target.pgid, 0) - target.key = target.inodes[0].key - _assert(len(target.key) > 0, "rebalance(1): zero-length node key") - } else { - // Reparent and move node. - if child, ok := n.bucket.nodes[target.inodes[len(target.inodes)-1].pgid]; ok { - child.parent.removeChild(child) - child.parent = n - child.parent.children = append(child.parent.children, child) - } - n.inodes = append(n.inodes, inode{}) - copy(n.inodes[1:], n.inodes) - n.inodes[0] = target.inodes[len(target.inodes)-1] - target.inodes = target.inodes[:len(target.inodes)-1] - } - - // Update parent key for node. - n.parent.put(n.key, n.inodes[0].key, nil, n.pgid, 0) - n.key = n.inodes[0].key - _assert(len(n.key) > 0, "rebalance(2): zero-length node key") - - return - } - // If both this node and the target node are too small then merge them. if useNextSibling { // Reparent all child nodes being moved. diff --git a/Godeps/_workspace/src/github.com/boltdb/bolt/page.go b/Godeps/_workspace/src/github.com/boltdb/bolt/page.go index 818aa1b1531deee61e3f01b9184da88ab5058c20..4a555286a384b41911af34b769cbe8aac2b4b955 100644 --- a/Godeps/_workspace/src/github.com/boltdb/bolt/page.go +++ b/Godeps/_workspace/src/github.com/boltdb/bolt/page.go @@ -111,13 +111,13 @@ type leafPageElement struct { // key returns a byte slice of the node key. func (n *leafPageElement) key() []byte { buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize] + return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize] } // value returns a byte slice of the node value. func (n *leafPageElement) value() []byte { buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize] + return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize] } // PageInfo represents human readable information about a page. diff --git a/Godeps/_workspace/src/github.com/boltdb/bolt/tx.go b/Godeps/_workspace/src/github.com/boltdb/bolt/tx.go index e74d2cae76045e319654d7e3a231c095b6a47667..b8510fdb87bf32713aa7b828c0c597bea171571d 100644 --- a/Godeps/_workspace/src/github.com/boltdb/bolt/tx.go +++ b/Godeps/_workspace/src/github.com/boltdb/bolt/tx.go @@ -5,6 +5,7 @@ import ( "io" "os" "sort" + "strings" "time" "unsafe" ) @@ -202,8 +203,17 @@ func (tx *Tx) Commit() error { // If strict mode is enabled then perform a consistency check. // Only the first consistency error is reported in the panic. if tx.db.StrictMode { - if err, ok := <-tx.Check(); ok { - panic("check fail: " + err.Error()) + ch := tx.Check() + var errs []string + for { + err, ok := <-ch + if !ok { + break + } + errs = append(errs, err.Error()) + } + if len(errs) > 0 { + panic("check fail: " + strings.Join(errs, "\n")) } } @@ -297,12 +307,34 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { } defer func() { _ = f.Close() }() - // Copy the meta pages. - tx.db.metalock.Lock() - n, err = io.CopyN(w, f, int64(tx.db.pageSize*2)) - tx.db.metalock.Unlock() + // Generate a meta page. We use the same page data for both meta pages. + buf := make([]byte, tx.db.pageSize) + page := (*page)(unsafe.Pointer(&buf[0])) + page.flags = metaPageFlag + *page.meta() = *tx.meta + + // Write meta 0. + page.id = 0 + page.meta().checksum = page.meta().sum64() + nn, err := w.Write(buf) + n += int64(nn) + if err != nil { + return n, fmt.Errorf("meta 0 copy: %s", err) + } + + // Write meta 1 with a lower transaction id. + page.id = 1 + page.meta().txid -= 1 + page.meta().checksum = page.meta().sum64() + nn, err = w.Write(buf) + n += int64(nn) if err != nil { - return n, fmt.Errorf("meta copy: %s", err) + return n, fmt.Errorf("meta 1 copy: %s", err) + } + + // Move past the meta pages in the file. + if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil { + return n, fmt.Errorf("seek: %s", err) } // Copy data pages. diff --git a/Godeps/_workspace/src/github.com/golang/snappy/.gitignore b/Godeps/_workspace/src/github.com/golang/snappy/.gitignore index 1b4ec9304aab6ae5a1b81748a0091a42e17bb504..042091d9b3b0d93b7070e05e11a35b4131c826f7 100644 --- a/Godeps/_workspace/src/github.com/golang/snappy/.gitignore +++ b/Godeps/_workspace/src/github.com/golang/snappy/.gitignore @@ -1,3 +1,8 @@ +cmd/snappytool/snappytool +testdata/bench + +# These explicitly listed benchmark data files are for an obsolete version of +# snappy_test.go. testdata/alice29.txt testdata/asyoulik.txt testdata/fireworks.jpeg diff --git a/Godeps/_workspace/src/github.com/golang/snappy/cmd/snappytool/main.cpp b/Godeps/_workspace/src/github.com/golang/snappy/cmd/snappytool/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..db28a899e261e4f64fb0fecda03ac50425b25fd8 --- /dev/null +++ b/Godeps/_workspace/src/github.com/golang/snappy/cmd/snappytool/main.cpp @@ -0,0 +1,74 @@ +/* +To build the snappytool binary: +g++ main.cpp /usr/lib/libsnappy.a -o snappytool +*/ + +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "snappy.h" + +#define N 1000000 + +char dst[N]; +char src[N]; + +int main(int argc, char** argv) { + // Parse args. + if (argc != 2) { + fprintf(stderr, "exactly one of -d or -e must be given\n"); + return 1; + } + bool decode = strcmp(argv[1], "-d") == 0; + bool encode = strcmp(argv[1], "-e") == 0; + if (decode == encode) { + fprintf(stderr, "exactly one of -d or -e must be given\n"); + return 1; + } + + // Read all of stdin into src[:s]. + size_t s = 0; + while (1) { + if (s == N) { + fprintf(stderr, "input too large\n"); + return 1; + } + ssize_t n = read(0, src+s, N-s); + if (n == 0) { + break; + } + if (n < 0) { + fprintf(stderr, "read error: %s\n", strerror(errno)); + // TODO: handle EAGAIN, EINTR? + return 1; + } + s += n; + } + + // Encode or decode src[:s] to dst[:d], and write to stdout. + size_t d = 0; + if (encode) { + if (N < snappy::MaxCompressedLength(s)) { + fprintf(stderr, "input too large after encoding\n"); + return 1; + } + snappy::RawCompress(src, s, dst, &d); + } else { + if (!snappy::GetUncompressedLength(src, s, &d)) { + fprintf(stderr, "could not get uncompressed length\n"); + return 1; + } + if (N < d) { + fprintf(stderr, "input too large after decoding\n"); + return 1; + } + if (!snappy::RawUncompress(src, s, dst)) { + fprintf(stderr, "input was not valid Snappy-compressed data\n"); + return 1; + } + } + write(1, dst, d); + return 0; +} diff --git a/Godeps/_workspace/src/github.com/golang/snappy/decode.go b/Godeps/_workspace/src/github.com/golang/snappy/decode.go index 6c5dd66bf03e6a4b680752b10ff8e79837a12cf7..7be590cee7bd17360caba9f51d0ee8495e49154d 100644 --- a/Godeps/_workspace/src/github.com/golang/snappy/decode.go +++ b/Godeps/_workspace/src/github.com/golang/snappy/decode.go @@ -43,95 +43,36 @@ func decodedLen(src []byte) (blockLen, headerLen int, err error) { return int(v), n, nil } +const ( + decodeErrCodeCorrupt = 1 + decodeErrCodeUnsupportedLiteralLength = 2 + decodeErrCodeUnsupportedCopy4Tag = 3 +) + // Decode returns the decoded form of src. The returned slice may be a sub- // slice of dst if dst was large enough to hold the entire decoded block. // Otherwise, a newly allocated slice will be returned. -// It is valid to pass a nil dst. +// +// The dst and src must not overlap. It is valid to pass a nil dst. func Decode(dst, src []byte) ([]byte, error) { dLen, s, err := decodedLen(src) if err != nil { return nil, err } - if len(dst) < dLen { + if dLen <= len(dst) { + dst = dst[:dLen] + } else { dst = make([]byte, dLen) } - - var d, offset, length int - for s < len(src) { - switch src[s] & 0x03 { - case tagLiteral: - x := uint(src[s] >> 2) - switch { - case x < 60: - s++ - case x == 60: - s += 2 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return nil, ErrCorrupt - } - x = uint(src[s-1]) - case x == 61: - s += 3 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return nil, ErrCorrupt - } - x = uint(src[s-2]) | uint(src[s-1])<<8 - case x == 62: - s += 4 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return nil, ErrCorrupt - } - x = uint(src[s-3]) | uint(src[s-2])<<8 | uint(src[s-1])<<16 - case x == 63: - s += 5 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return nil, ErrCorrupt - } - x = uint(src[s-4]) | uint(src[s-3])<<8 | uint(src[s-2])<<16 | uint(src[s-1])<<24 - } - length = int(x + 1) - if length <= 0 { - return nil, errUnsupportedLiteralLength - } - if length > len(dst)-d || length > len(src)-s { - return nil, ErrCorrupt - } - copy(dst[d:], src[s:s+length]) - d += length - s += length - continue - - case tagCopy1: - s += 2 - if s > len(src) { - return nil, ErrCorrupt - } - length = 4 + int(src[s-2])>>2&0x7 - offset = int(src[s-2])&0xe0<<3 | int(src[s-1]) - - case tagCopy2: - s += 3 - if s > len(src) { - return nil, ErrCorrupt - } - length = 1 + int(src[s-3])>>2 - offset = int(src[s-2]) | int(src[s-1])<<8 - - case tagCopy4: - return nil, errUnsupportedCopy4Tag - } - - if offset <= 0 || d < offset || length > len(dst)-d { - return nil, ErrCorrupt - } - for end := d + length; d != end; d++ { - dst[d] = dst[d-offset] - } - } - if d != dLen { - return nil, ErrCorrupt + switch decode(dst, src[s:]) { + case 0: + return dst, nil + case decodeErrCodeUnsupportedLiteralLength: + return nil, errUnsupportedLiteralLength + case decodeErrCodeUnsupportedCopy4Tag: + return nil, errUnsupportedCopy4Tag } - return dst[:d], nil + return nil, ErrCorrupt } // NewReader returns a new Reader that decompresses from r, using the framing @@ -140,8 +81,8 @@ func Decode(dst, src []byte) ([]byte, error) { func NewReader(r io.Reader) *Reader { return &Reader{ r: r, - decoded: make([]byte, maxUncompressedChunkLen), - buf: make([]byte, maxEncodedLenOfMaxUncompressedChunkLen+checksumSize), + decoded: make([]byte, maxBlockSize), + buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize), } } diff --git a/Godeps/_workspace/src/github.com/golang/snappy/decode_amd64.go b/Godeps/_workspace/src/github.com/golang/snappy/decode_amd64.go new file mode 100644 index 0000000000000000000000000000000000000000..32bce470931c98ba7655b051bd7c33c25e2a8d83 --- /dev/null +++ b/Godeps/_workspace/src/github.com/golang/snappy/decode_amd64.go @@ -0,0 +1,10 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +// decode has the same semantics as in decode_other.go. +// +//go:noescape +func decode(dst, src []byte) int diff --git a/Godeps/_workspace/src/github.com/golang/snappy/decode_amd64.s b/Godeps/_workspace/src/github.com/golang/snappy/decode_amd64.s new file mode 100644 index 0000000000000000000000000000000000000000..c33f5bf97b33bafb6a8ab20fa231cc846b105c7a --- /dev/null +++ b/Godeps/_workspace/src/github.com/golang/snappy/decode_amd64.s @@ -0,0 +1,472 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "textflag.h" + +// func decode(dst, src []byte) int +// +// The asm code generally follows the pure Go code in decode_other.go, except +// where marked with a "!!!". +// +// All local variables fit into registers. The non-zero stack size is only to +// spill registers and push args when issuing a CALL. The register allocation: +// - AX scratch +// - BX scratch +// - CX length or x +// - DX offset +// - SI &src[s] +// - DI &dst[d] +// + R8 dst_base +// + R9 dst_len +// + R10 dst_base + dst_len +// + R11 src_base +// + R12 src_len +// + R13 src_base + src_len +// - R14 used by doCopy +// - R15 used by doCopy +// +// The registers R8-R13 (marked with a "+") are set at the start of the +// function, and after a CALL returns, and are not otherwise modified. +// +// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI. +// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI. +TEXT ·decode(SB), NOSPLIT, $48-56 + // Initialize SI, DI and R8-R13. + MOVQ dst_base+0(FP), R8 + MOVQ dst_len+8(FP), R9 + MOVQ R8, DI + MOVQ R8, R10 + ADDQ R9, R10 + MOVQ src_base+24(FP), R11 + MOVQ src_len+32(FP), R12 + MOVQ R11, SI + MOVQ R11, R13 + ADDQ R12, R13 + +loop: + // for s < len(src) + CMPQ SI, R13 + JEQ end + + // CX = uint32(src[s]) + // + // switch src[s] & 0x03 + MOVBLZX (SI), CX + MOVL CX, BX + ANDL $3, BX + CMPL BX, $1 + JAE tagCopy + + // ---------------------------------------- + // The code below handles literal tags. + + // case tagLiteral: + // x := uint32(src[s] >> 2) + // switch + SHRL $2, CX + CMPL CX, $60 + JAE tagLit60Plus + + // case x < 60: + // s++ + INCQ SI + +doLit: + // This is the end of the inner "switch", when we have a literal tag. + // + // We assume that CX == x and x fits in a uint32, where x is the variable + // used in the pure Go decode_other.go code. + + // length = int(x) + 1 + // + // Unlike the pure Go code, we don't need to check if length <= 0 because + // CX can hold 64 bits, so the increment cannot overflow. + INCQ CX + + // Prepare to check if copying length bytes will run past the end of dst or + // src. + // + // AX = len(dst) - d + // BX = len(src) - s + MOVQ R10, AX + SUBQ DI, AX + MOVQ R13, BX + SUBQ SI, BX + + // !!! Try a faster technique for short (16 or fewer bytes) copies. + // + // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { + // goto callMemmove // Fall back on calling runtime·memmove. + // } + // + // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s + // against 21 instead of 16, because it cannot assume that all of its input + // is contiguous in memory and so it needs to leave enough source bytes to + // read the next tag without refilling buffers, but Go's Decode assumes + // contiguousness (the src argument is a []byte). + CMPQ CX, $16 + JGT callMemmove + CMPQ AX, $16 + JLT callMemmove + CMPQ BX, $16 + JLT callMemmove + + // !!! Implement the copy from src to dst as a 16-byte load and store. + // (Decode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only length bytes, but that's + // OK. If the input is a valid Snappy encoding then subsequent iterations + // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a + // non-nil error), so the overrun will be ignored. + // + // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + MOVOU 0(SI), X0 + MOVOU X0, 0(DI) + + // d += length + // s += length + ADDQ CX, DI + ADDQ CX, SI + JMP loop + +callMemmove: + // if length > len(dst)-d || length > len(src)-s { etc } + CMPQ CX, AX + JGT errCorrupt + CMPQ CX, BX + JGT errCorrupt + + // copy(dst[d:], src[s:s+length]) + // + // This means calling runtime·memmove(&dst[d], &src[s], length), so we push + // DI, SI and CX as arguments. Coincidentally, we also need to spill those + // three registers to the stack, to save local variables across the CALL. + MOVQ DI, 0(SP) + MOVQ SI, 8(SP) + MOVQ CX, 16(SP) + MOVQ DI, 24(SP) + MOVQ SI, 32(SP) + MOVQ CX, 40(SP) + CALL runtime·memmove(SB) + + // Restore local variables: unspill registers from the stack and + // re-calculate R8-R13. + MOVQ 24(SP), DI + MOVQ 32(SP), SI + MOVQ 40(SP), CX + MOVQ dst_base+0(FP), R8 + MOVQ dst_len+8(FP), R9 + MOVQ R8, R10 + ADDQ R9, R10 + MOVQ src_base+24(FP), R11 + MOVQ src_len+32(FP), R12 + MOVQ R11, R13 + ADDQ R12, R13 + + // d += length + // s += length + ADDQ CX, DI + ADDQ CX, SI + JMP loop + +tagLit60Plus: + // !!! This fragment does the + // + // s += x - 58; if uint(s) > uint(len(src)) { etc } + // + // checks. In the asm version, we code it once instead of once per switch case. + ADDQ CX, SI + SUBQ $58, SI + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // case x == 60: + CMPL CX, $61 + JEQ tagLit61 + JA tagLit62Plus + + // x = uint32(src[s-1]) + MOVBLZX -1(SI), CX + JMP doLit + +tagLit61: + // case x == 61: + // x = uint32(src[s-2]) | uint32(src[s-1])<<8 + MOVWLZX -2(SI), CX + JMP doLit + +tagLit62Plus: + CMPL CX, $62 + JA tagLit63 + + // case x == 62: + // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + MOVWLZX -3(SI), CX + MOVBLZX -1(SI), BX + SHLL $16, BX + ORL BX, CX + JMP doLit + +tagLit63: + // case x == 63: + // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + MOVL -4(SI), CX + JMP doLit + +// The code above handles literal tags. +// ---------------------------------------- +// The code below handles copy tags. + +tagCopy2: + // case tagCopy2: + // s += 3 + ADDQ $3, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // length = 1 + int(src[s-3])>>2 + SHRQ $2, CX + INCQ CX + + // offset = int(src[s-2]) | int(src[s-1])<<8 + MOVWQZX -2(SI), DX + JMP doCopy + +tagCopy: + // We have a copy tag. We assume that: + // - BX == src[s] & 0x03 + // - CX == src[s] + CMPQ BX, $2 + JEQ tagCopy2 + JA errUC4T + + // case tagCopy1: + // s += 2 + ADDQ $2, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // offset = int(src[s-2])&0xe0<<3 | int(src[s-1]) + MOVQ CX, DX + ANDQ $0xe0, DX + SHLQ $3, DX + MOVBQZX -1(SI), BX + ORQ BX, DX + + // length = 4 + int(src[s-2])>>2&0x7 + SHRQ $2, CX + ANDQ $7, CX + ADDQ $4, CX + +doCopy: + // This is the end of the outer "switch", when we have a copy tag. + // + // We assume that: + // - CX == length && CX > 0 + // - DX == offset + + // if offset <= 0 { etc } + CMPQ DX, $0 + JLE errCorrupt + + // if d < offset { etc } + MOVQ DI, BX + SUBQ R8, BX + CMPQ BX, DX + JLT errCorrupt + + // if length > len(dst)-d { etc } + MOVQ R10, BX + SUBQ DI, BX + CMPQ CX, BX + JGT errCorrupt + + // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length + // + // Set: + // - R14 = len(dst)-d + // - R15 = &dst[d-offset] + MOVQ R10, R14 + SUBQ DI, R14 + MOVQ DI, R15 + SUBQ DX, R15 + + // !!! Try a faster technique for short (16 or fewer bytes) forward copies. + // + // First, try using two 8-byte load/stores, similar to the doLit technique + // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is + // still OK if offset >= 8. Note that this has to be two 8-byte load/stores + // and not one 16-byte load/store, and the first store has to be before the + // second load, due to the overlap if offset is in the range [8, 16). + // + // if length > 16 || offset < 8 || len(dst)-d < 16 { + // goto slowForwardCopy + // } + // copy 16 bytes + // d += length + CMPQ CX, $16 + JGT slowForwardCopy + CMPQ DX, $8 + JLT slowForwardCopy + CMPQ R14, $16 + JLT slowForwardCopy + MOVQ 0(R15), AX + MOVQ AX, 0(DI) + MOVQ 8(R15), BX + MOVQ BX, 8(DI) + ADDQ CX, DI + JMP loop + +slowForwardCopy: + // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we + // can still try 8-byte load stores, provided we can overrun up to 10 extra + // bytes. As above, the overrun will be fixed up by subsequent iterations + // of the outermost loop. + // + // The C++ snappy code calls this technique IncrementalCopyFastPath. Its + // commentary says: + // + // ---- + // + // The main part of this loop is a simple copy of eight bytes at a time + // until we've copied (at least) the requested amount of bytes. However, + // if d and d-offset are less than eight bytes apart (indicating a + // repeating pattern of length < 8), we first need to expand the pattern in + // order to get the correct results. For instance, if the buffer looks like + // this, with the eight-byte <d-offset> and <d> patterns marked as + // intervals: + // + // abxxxxxxxxxxxx + // [------] d-offset + // [------] d + // + // a single eight-byte copy from <d-offset> to <d> will repeat the pattern + // once, after which we can move <d> two bytes without moving <d-offset>: + // + // ababxxxxxxxxxx + // [------] d-offset + // [------] d + // + // and repeat the exercise until the two no longer overlap. + // + // This allows us to do very well in the special case of one single byte + // repeated many times, without taking a big hit for more general cases. + // + // The worst case of extra writing past the end of the match occurs when + // offset == 1 and length == 1; the last copy will read from byte positions + // [0..7] and write to [4..11], whereas it was only supposed to write to + // position 1. Thus, ten excess bytes. + // + // ---- + // + // That "10 byte overrun" worst case is confirmed by Go's + // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy + // and finishSlowForwardCopy algorithm. + // + // if length > len(dst)-d-10 { + // goto verySlowForwardCopy + // } + SUBQ $10, R14 + CMPQ CX, R14 + JGT verySlowForwardCopy + +makeOffsetAtLeast8: + // !!! As above, expand the pattern so that offset >= 8 and we can use + // 8-byte load/stores. + // + // for offset < 8 { + // copy 8 bytes from dst[d-offset:] to dst[d:] + // length -= offset + // d += offset + // offset += offset + // // The two previous lines together means that d-offset, and therefore + // // R15, is unchanged. + // } + CMPQ DX, $8 + JGE fixUpSlowForwardCopy + MOVQ (R15), BX + MOVQ BX, (DI) + SUBQ DX, CX + ADDQ DX, DI + ADDQ DX, DX + JMP makeOffsetAtLeast8 + +fixUpSlowForwardCopy: + // !!! Add length (which might be negative now) to d (implied by DI being + // &dst[d]) so that d ends up at the right place when we jump back to the + // top of the loop. Before we do that, though, we save DI to AX so that, if + // length is positive, copying the remaining length bytes will write to the + // right place. + MOVQ DI, AX + ADDQ CX, DI + +finishSlowForwardCopy: + // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative + // length means that we overrun, but as above, that will be fixed up by + // subsequent iterations of the outermost loop. + CMPQ CX, $0 + JLE loop + MOVQ (R15), BX + MOVQ BX, (AX) + ADDQ $8, R15 + ADDQ $8, AX + SUBQ $8, CX + JMP finishSlowForwardCopy + +verySlowForwardCopy: + // verySlowForwardCopy is a simple implementation of forward copy. In C + // parlance, this is a do/while loop instead of a while loop, since we know + // that length > 0. In Go syntax: + // + // for { + // dst[d] = dst[d - offset] + // d++ + // length-- + // if length == 0 { + // break + // } + // } + MOVB (R15), BX + MOVB BX, (DI) + INCQ R15 + INCQ DI + DECQ CX + JNZ verySlowForwardCopy + JMP loop + +// The code above handles copy tags. +// ---------------------------------------- + +end: + // This is the end of the "for s < len(src)". + // + // if d != len(dst) { etc } + CMPQ DI, R10 + JNE errCorrupt + + // return 0 + MOVQ $0, ret+48(FP) + RET + +errCorrupt: + // return decodeErrCodeCorrupt + MOVQ $1, ret+48(FP) + RET + +errUC4T: + // return decodeErrCodeUnsupportedCopy4Tag + MOVQ $3, ret+48(FP) + RET diff --git a/Godeps/_workspace/src/github.com/golang/snappy/decode_other.go b/Godeps/_workspace/src/github.com/golang/snappy/decode_other.go new file mode 100644 index 0000000000000000000000000000000000000000..1a8114ab1f6a70890e0280ed78fd70e2c81ef74a --- /dev/null +++ b/Godeps/_workspace/src/github.com/golang/snappy/decode_other.go @@ -0,0 +1,96 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 + +package snappy + +// decode writes the decoding of src to dst. It assumes that the varint-encoded +// length of the decompressed bytes has already been read, and that len(dst) +// equals that length. +// +// It returns 0 on success or a decodeErrCodeXxx error code on failure. +func decode(dst, src []byte) int { + var d, s, offset, length int + for s < len(src) { + switch src[s] & 0x03 { + case tagLiteral: + x := uint32(src[s] >> 2) + switch { + case x < 60: + s++ + case x == 60: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-1]) + case x == 61: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-2]) | uint32(src[s-1])<<8 + case x == 62: + s += 4 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + case x == 63: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + } + length = int(x) + 1 + if length <= 0 { + return decodeErrCodeUnsupportedLiteralLength + } + if length > len(dst)-d || length > len(src)-s { + return decodeErrCodeCorrupt + } + copy(dst[d:], src[s:s+length]) + d += length + s += length + continue + + case tagCopy1: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 4 + int(src[s-2])>>2&0x7 + offset = int(src[s-2])&0xe0<<3 | int(src[s-1]) + + case tagCopy2: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-3])>>2 + offset = int(src[s-2]) | int(src[s-1])<<8 + + case tagCopy4: + return decodeErrCodeUnsupportedCopy4Tag + } + + if offset <= 0 || d < offset || length > len(dst)-d { + return decodeErrCodeCorrupt + } + // Copy from an earlier sub-slice of dst to a later sub-slice. Unlike + // the built-in copy function, this byte-by-byte copy always runs + // forwards, even if the slices overlap. Conceptually, this is: + // + // d += forwardCopy(dst[d:d+length], dst[d-offset:]) + for end := d + length; d != end; d++ { + dst[d] = dst[d-offset] + } + } + if d != len(dst) { + return decodeErrCodeCorrupt + } + return 0 +} diff --git a/Godeps/_workspace/src/github.com/golang/snappy/encode.go b/Godeps/_workspace/src/github.com/golang/snappy/encode.go index b9fe4dcbbf5ef7e0459a5cd93bf1fc7b52939636..38ebe952e014a9593e52a12fd5e9dc9660ae0cf9 100644 --- a/Godeps/_workspace/src/github.com/golang/snappy/encode.go +++ b/Godeps/_workspace/src/github.com/golang/snappy/encode.go @@ -10,7 +10,8 @@ import ( "io" ) -// We limit how far copy back-references can go, the same as the C++ code. +// maxOffset limits how far copy back-references can go, the same as the C++ +// code. const maxOffset = 1 << 15 // emitLiteral writes a literal chunk and returns the number of bytes written. @@ -94,30 +95,22 @@ func Encode(dst, src []byte) []byte { for len(src) > 0 { p := src src = nil - if len(p) > maxInternalEncodeSrcLen { - p, src = p[:maxInternalEncodeSrcLen], p[maxInternalEncodeSrcLen:] + if len(p) > maxBlockSize { + p, src = p[:maxBlockSize], p[maxBlockSize:] } - d += encode(dst[d:], p) + d += encodeBlock(dst[d:], p) } return dst[:d] } -// maxInternalEncodeSrcLen must be less than math.MaxInt32, so that in the -// (internal) encode function, it is safe to have the s variable (which indexes -// the src slice), and therefore the hash table entries, to have type int32 -// instead of int. -const maxInternalEncodeSrcLen = 0x40000000 - -// encode encodes a non-empty src to a guaranteed-large-enough dst. It assumes -// that the varint-encoded length of the decompressed bytes has already been -// written. +// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It +// assumes that the varint-encoded length of the decompressed bytes has already +// been written. // // It also assumes that: // len(dst) >= MaxEncodedLen(len(src)) && -// 0 < len(src) && -// len(src) <= maxInternalEncodeSrcLen && -// maxInternalEncodeSrcLen < math.MaxInt32. -func encode(dst, src []byte) (d int) { +// 0 < len(src) && len(src) <= maxBlockSize +func encodeBlock(dst, src []byte) (d int) { // Return early if src is short. if len(src) <= 4 { return emitLiteral(dst, src) @@ -137,6 +130,22 @@ func encode(dst, src []byte) (d int) { s int32 // The iterator position. t int32 // The last position with the same hash as s. lit int32 // The start position of any pending literal bytes. + + // Copied from the C++ snappy implementation: + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned, look at every third byte, etc.. When a match is found, + // immediately go back to looking at every byte. This is a small loss + // (~5% performance, ~0.1% density) for compressible data due to more + // bookkeeping, but for non-compressible data (such as JPEG) it's a + // huge win since the compressor quickly "realizes" the data is + // incompressible and doesn't bother looking for matches everywhere. + // + // The "skip" variable keeps track of how many bytes there are since + // the last match; dividing it by 32 (ie. right-shifting by five) gives + // the number of bytes to move ahead for each iteration. + skip uint32 = 32 ) for uint32(s+3) < uint32(len(src)) { // The uint32 conversions catch overflow from the +3. // Update the hash table. @@ -150,10 +159,11 @@ func encode(dst, src []byte) (d int) { t, *p = *p-1, s+1 // If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte. if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] { - // Skip multiple bytes if the last match was >= 32 bytes prior. - s += 1 + (s-lit)>>5 + s += int32(skip >> 5) + skip++ continue } + skip = 32 // Otherwise, we have a match. First, emit any pending literal bytes. if lit != s { d += emitLiteral(dst[d:], src[lit:s]) @@ -241,7 +251,7 @@ func NewWriter(w io.Writer) *Writer { func NewBufferedWriter(w io.Writer) *Writer { return &Writer{ w: w, - ibuf: make([]byte, 0, maxUncompressedChunkLen), + ibuf: make([]byte, 0, maxBlockSize), obuf: make([]byte, obufLen), } } @@ -325,8 +335,8 @@ func (w *Writer) write(p []byte) (nRet int, errRet error) { } var uncompressed []byte - if len(p) > maxUncompressedChunkLen { - uncompressed, p = p[:maxUncompressedChunkLen], p[maxUncompressedChunkLen:] + if len(p) > maxBlockSize { + uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] } else { uncompressed, p = p, nil } diff --git a/Godeps/_workspace/src/github.com/golang/snappy/snappy.go b/Godeps/_workspace/src/github.com/golang/snappy/snappy.go index ef1e33e00bd68299030c0eed13a1c1bbbc7f0105..bb54bf626c7ea88410b2ea055476998c96f89599 100644 --- a/Godeps/_workspace/src/github.com/golang/snappy/snappy.go +++ b/Godeps/_workspace/src/github.com/golang/snappy/snappy.go @@ -46,18 +46,25 @@ const ( chunkHeaderSize = 4 magicChunk = "\xff\x06\x00\x00" + magicBody magicBody = "sNaPpY" + + // maxBlockSize is the maximum size of the input to encodeBlock. It is not + // part of the wire format per se, but some parts of the encoder assume + // that an offset fits into a uint16. + // + // Also, for the framing format (Writer type instead of Encode function), // https://github.com/google/snappy/blob/master/framing_format.txt says - // that "the uncompressed data in a chunk must be no longer than 65536 bytes". - maxUncompressedChunkLen = 65536 + // that "the uncompressed data in a chunk must be no longer than 65536 + // bytes". + maxBlockSize = 65536 - // maxEncodedLenOfMaxUncompressedChunkLen equals - // MaxEncodedLen(maxUncompressedChunkLen), but is hard coded to be a const - // instead of a variable, so that obufLen can also be a const. Their - // equivalence is confirmed by TestMaxEncodedLenOfMaxUncompressedChunkLen. - maxEncodedLenOfMaxUncompressedChunkLen = 76490 + // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is + // hard coded to be a const instead of a variable, so that obufLen can also + // be a const. Their equivalence is confirmed by + // TestMaxEncodedLenOfMaxBlockSize. + maxEncodedLenOfMaxBlockSize = 76490 obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize - obufLen = obufHeaderLen + maxEncodedLenOfMaxUncompressedChunkLen + obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize ) const ( diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go index 6fd059080375f67c0e891bcb3583d5633284e41e..c7cf6c428976f41f5fa44df40fcd1286615e42cb 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/batch.go @@ -15,6 +15,7 @@ import ( "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage" ) +// ErrBatchCorrupted records reason of batch corruption. type ErrBatchCorrupted struct { Reason string } @@ -32,6 +33,7 @@ const ( batchGrowRec = 3000 ) +// BatchReplay wraps basic batch operations. type BatchReplay interface { Put(key, value []byte) Delete(key []byte) @@ -68,20 +70,20 @@ func (b *Batch) grow(n int) { } } -func (b *Batch) appendRec(kt kType, key, value []byte) { +func (b *Batch) appendRec(kt keyType, key, value []byte) { n := 1 + binary.MaxVarintLen32 + len(key) - if kt == ktVal { + if kt == keyTypeVal { n += binary.MaxVarintLen32 + len(value) } b.grow(n) off := len(b.data) data := b.data[:off+n] data[off] = byte(kt) - off += 1 + off++ off += binary.PutUvarint(data[off:], uint64(len(key))) copy(data[off:], key) off += len(key) - if kt == ktVal { + if kt == keyTypeVal { off += binary.PutUvarint(data[off:], uint64(len(value))) copy(data[off:], value) off += len(value) @@ -95,13 +97,13 @@ func (b *Batch) appendRec(kt kType, key, value []byte) { // Put appends 'put operation' of the given key/value pair to the batch. // It is safe to modify the contents of the argument after Put returns. func (b *Batch) Put(key, value []byte) { - b.appendRec(ktVal, key, value) + b.appendRec(keyTypeVal, key, value) } // Delete appends 'delete operation' of the given key to the batch. // It is safe to modify the contents of the argument after Delete returns. func (b *Batch) Delete(key []byte) { - b.appendRec(ktDel, key, nil) + b.appendRec(keyTypeDel, key, nil) } // Dump dumps batch contents. The returned slice can be loaded into the @@ -122,11 +124,11 @@ func (b *Batch) Load(data []byte) error { // Replay replays batch contents. func (b *Batch) Replay(r BatchReplay) error { - return b.decodeRec(func(i int, kt kType, key, value []byte) error { + return b.decodeRec(func(i int, kt keyType, key, value []byte) error { switch kt { - case ktVal: + case keyTypeVal: r.Put(key, value) - case ktDel: + case keyTypeDel: r.Delete(key) } return nil @@ -156,6 +158,7 @@ func (b *Batch) append(p *Batch) { b.grow(len(p.data) - batchHdrLen) b.data = append(b.data, p.data[batchHdrLen:]...) b.rLen += p.rLen + b.bLen += p.bLen } if p.sync { b.sync = true @@ -195,18 +198,19 @@ func (b *Batch) decode(prevSeq uint64, data []byte) error { return nil } -func (b *Batch) decodeRec(f func(i int, kt kType, key, value []byte) error) error { +func (b *Batch) decodeRec(f func(i int, kt keyType, key, value []byte) error) error { off := batchHdrLen for i := 0; i < b.rLen; i++ { if off >= len(b.data) { return newErrBatchCorrupted("invalid records length") } - kt := kType(b.data[off]) - if kt > ktVal { + kt := keyType(b.data[off]) + if kt > keyTypeVal { + panic(kt) return newErrBatchCorrupted("bad record: invalid type") } - off += 1 + off++ x, n := binary.Uvarint(b.data[off:]) off += n @@ -216,7 +220,7 @@ func (b *Batch) decodeRec(f func(i int, kt kType, key, value []byte) error) erro key := b.data[off : off+int(x)] off += int(x) var value []byte - if kt == ktVal { + if kt == keyTypeVal { x, n := binary.Uvarint(b.data[off:]) off += n if n <= 0 || off+int(x) > len(b.data) { @@ -236,8 +240,8 @@ func (b *Batch) decodeRec(f func(i int, kt kType, key, value []byte) error) erro func (b *Batch) memReplay(to *memdb.DB) error { var ikScratch []byte - return b.decodeRec(func(i int, kt kType, key, value []byte) error { - ikScratch = makeIkey(ikScratch, key, b.seq+uint64(i), kt) + return b.decodeRec(func(i int, kt keyType, key, value []byte) error { + ikScratch = makeInternalKey(ikScratch, key, b.seq+uint64(i), kt) return to.Put(ikScratch, value) }) } @@ -251,8 +255,8 @@ func (b *Batch) memDecodeAndReplay(prevSeq uint64, data []byte, to *memdb.DB) er func (b *Batch) revertMemReplay(to *memdb.DB) error { var ikScratch []byte - return b.decodeRec(func(i int, kt kType, key, value []byte) error { - ikScratch := makeIkey(ikScratch, key, b.seq+uint64(i), kt) + return b.decodeRec(func(i int, kt keyType, key, value []byte) error { + ikScratch := makeInternalKey(ikScratch, key, b.seq+uint64(i), kt) return to.Delete(ikScratch) }) } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go index a6c27320c896a52f51d3cc62808d0b3f9d71a9e4..b6f647b94341dfa79f2b54bb89346603007a6370 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/cache/cache.go @@ -47,17 +47,21 @@ type Cacher interface { // so the the Release method will be called once object is released. type Value interface{} -type CacheGetter struct { +// NamespaceGetter provides convenient wrapper for namespace. +type NamespaceGetter struct { Cache *Cache NS uint64 } -func (g *CacheGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle { +// Get simply calls Cache.Get() method. +func (g *NamespaceGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle { return g.Cache.Get(g.NS, key, setFunc) } // The hash tables implementation is based on: -// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu, Kunlong Zhang, and Michael Spear. ACM Symposium on Principles of Distributed Computing, Jul 2014. +// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu, +// Kunlong Zhang, and Michael Spear. +// ACM Symposium on Principles of Distributed Computing, Jul 2014. const ( mInitialSize = 1 << 4 @@ -610,10 +614,12 @@ func (n *Node) unrefLocked() { } } +// Handle is a 'cache handle' of a 'cache node'. type Handle struct { n unsafe.Pointer // *Node } +// Value returns the value of the 'cache node'. func (h *Handle) Value() Value { n := (*Node)(atomic.LoadPointer(&h.n)) if n != nil { @@ -622,6 +628,8 @@ func (h *Handle) Value() Value { return nil } +// Release releases this 'cache handle'. +// It is safe to call release multiple times. func (h *Handle) Release() { nPtr := atomic.LoadPointer(&h.n) if nPtr != nil && atomic.CompareAndSwapPointer(&h.n, nPtr, nil) { diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer.go index 610b4820d0338de3f4b6361815a5df4f13608f61..4674a6fe86dcfef0b0c7aadc545a6f3c61332b00 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/comparer.go @@ -33,9 +33,9 @@ func (icmp *iComparer) Name() string { } func (icmp *iComparer) Compare(a, b []byte) int { - x := icmp.ucmp.Compare(iKey(a).ukey(), iKey(b).ukey()) + x := icmp.ucmp.Compare(internalKey(a).ukey(), internalKey(b).ukey()) if x == 0 { - if m, n := iKey(a).num(), iKey(b).num(); m > n { + if m, n := internalKey(a).num(), internalKey(b).num(); m > n { x = -1 } else if m < n { x = 1 @@ -45,13 +45,13 @@ func (icmp *iComparer) Compare(a, b []byte) int { } func (icmp *iComparer) Separator(dst, a, b []byte) []byte { - ua, ub := iKey(a).ukey(), iKey(b).ukey() + ua, ub := internalKey(a).ukey(), internalKey(b).ukey() dst = icmp.ucmp.Separator(dst, ua, ub) if dst == nil { return nil } if len(dst) < len(ua) && icmp.uCompare(ua, dst) < 0 { - dst = append(dst, kMaxNumBytes...) + dst = append(dst, keyMaxNumBytes...) } else { // Did not close possibilities that n maybe longer than len(ub). dst = append(dst, a[len(a)-8:]...) @@ -60,13 +60,13 @@ func (icmp *iComparer) Separator(dst, a, b []byte) []byte { } func (icmp *iComparer) Successor(dst, b []byte) []byte { - ub := iKey(b).ukey() + ub := internalKey(b).ukey() dst = icmp.ucmp.Successor(dst, ub) if dst == nil { return nil } if len(dst) < len(ub) && icmp.uCompare(ub, dst) < 0 { - dst = append(dst, kMaxNumBytes...) + dst = append(dst, keyMaxNumBytes...) } else { // Did not close possibilities that n maybe longer than len(ub). dst = append(dst, b[len(b)-8:]...) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go index 5a0b6187f7dfea1cf420442bfa436dd8ef535487..822dde78586c3ecf93d4f04fef6b8d3a194bd6d2 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db.go @@ -315,7 +315,7 @@ func recoverTable(s *session, o *opt.Options) error { tw := table.NewWriter(writer, o) for iter.Next() { key := iter.Key() - if validIkey(key) { + if validInternalKey(key) { err = tw.Append(key, iter.Value()) if err != nil { return @@ -380,7 +380,7 @@ func recoverTable(s *session, o *opt.Options) error { // Scan the table. for iter.Next() { key := iter.Key() - _, seq, _, kerr := parseIkey(key) + _, seq, _, kerr := parseInternalKey(key) if kerr != nil { tcorruptedKey++ continue @@ -472,15 +472,15 @@ func recoverTable(s *session, o *opt.Options) error { func (db *DB) recoverJournal() error { // Get all journals and sort it by file number. - fds_, err := db.s.stor.List(storage.TypeJournal) + rawFds, err := db.s.stor.List(storage.TypeJournal) if err != nil { return err } - sortFds(fds_) + sortFds(rawFds) // Journals that will be recovered. var fds []storage.FileDesc - for _, fd := range fds_ { + for _, fd := range rawFds { if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum { fds = append(fds, fd) } @@ -633,15 +633,15 @@ func (db *DB) recoverJournal() error { func (db *DB) recoverJournalRO() error { // Get all journals and sort it by file number. - fds_, err := db.s.stor.List(storage.TypeJournal) + rawFds, err := db.s.stor.List(storage.TypeJournal) if err != nil { return err } - sortFds(fds_) + sortFds(rawFds) // Journals that will be recovered. var fds []storage.FileDesc - for _, fd := range fds_ { + for _, fd := range rawFds { if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum { fds = append(fds, fd) } @@ -728,16 +728,16 @@ func (db *DB) recoverJournalRO() error { return nil } -func memGet(mdb *memdb.DB, ikey iKey, icmp *iComparer) (ok bool, mv []byte, err error) { +func memGet(mdb *memdb.DB, ikey internalKey, icmp *iComparer) (ok bool, mv []byte, err error) { mk, mv, err := mdb.Find(ikey) if err == nil { - ukey, _, kt, kerr := parseIkey(mk) + ukey, _, kt, kerr := parseInternalKey(mk) if kerr != nil { // Shouldn't have had happen. panic(kerr) } if icmp.uCompare(ukey, ikey.ukey()) == 0 { - if kt == ktDel { + if kt == keyTypeDel { return true, nil, ErrNotFound } return true, mv, nil @@ -750,7 +750,7 @@ func memGet(mdb *memdb.DB, ikey iKey, icmp *iComparer) (ok bool, mv []byte, err } func (db *DB) get(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) { - ikey := makeIkey(nil, key, seq, ktSeek) + ikey := makeInternalKey(nil, key, seq, keyTypeSeek) if auxm != nil { if ok, mv, me := memGet(auxm, ikey, db.s.icmp); ok { @@ -788,7 +788,7 @@ func nilIfNotFound(err error) error { } func (db *DB) has(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) { - ikey := makeIkey(nil, key, seq, ktSeek) + ikey := makeInternalKey(nil, key, seq, keyTypeSeek) if auxm != nil { if ok, _, me := memGet(auxm, ikey, db.s.icmp); ok { @@ -997,8 +997,8 @@ func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) { sizes := make(Sizes, 0, len(ranges)) for _, r := range ranges { - imin := makeIkey(nil, r.Start, kMaxSeq, ktSeek) - imax := makeIkey(nil, r.Limit, kMaxSeq, ktSeek) + imin := makeInternalKey(nil, r.Start, keyMaxSeq, keyTypeSeek) + imax := makeInternalKey(nil, r.Limit, keyMaxSeq, keyTypeSeek) start, err := v.offsetOf(imin) if err != nil { return nil, err @@ -1007,7 +1007,7 @@ func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) { if err != nil { return nil, err } - var size uint64 + var size int64 if limit >= start { size = limit - start } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go index 47ef40fc409d817aaf588db7ce12aef521f15be8..668815a6e0a8bb50bc62b50d1af808677abb23d5 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_compaction.go @@ -452,7 +452,7 @@ func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error { } ikey := iter.Key() - ukey, seq, kt, kerr := parseIkey(ikey) + ukey, seq, kt, kerr := parseInternalKey(ikey) if kerr == nil { shouldStop := !resumed && b.c.shouldStopBefore(ikey) @@ -478,14 +478,14 @@ func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error { hasLastUkey = true lastUkey = append(lastUkey[:0], ukey...) - lastSeq = kMaxSeq + lastSeq = keyMaxSeq } switch { case lastSeq <= b.minSeq: // Dropped because newer entry for same user key exist fallthrough // (A) - case kt == ktDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey): + case kt == keyTypeDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey): // For this user key: // (1) there is no data in higher levels // (2) data in lower levels will have larger seq numbers @@ -507,7 +507,7 @@ func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error { // Don't drop corrupted keys. hasLastUkey = false lastUkey = lastUkey[:0] - lastSeq = kMaxSeq + lastSeq = keyMaxSeq b.kerrCnt++ } @@ -548,9 +548,7 @@ func (db *DB) tableCompaction(c *compaction, noTrivial bool) { db.logf("table@move L%d@%d -> L%d", c.sourceLevel, t.fd.Num, c.sourceLevel+1) rec.delTable(c.sourceLevel, t.fd.Num) rec.addTableFile(c.sourceLevel+1, t) - db.compactionTransactFunc("table@move", func(cnt *compactionTransactCounter) (err error) { - return db.s.commit(rec) - }, nil) + db.compactionCommit("table-move", rec) return } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go index d6992d103d57037aab5b9369e23301e8d764842f..ccf4787327bbd1c3255c2d4ef8f390bce8e05b06 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_iter.go @@ -19,7 +19,7 @@ import ( ) var ( - errInvalidIkey = errors.New("leveldb: Iterator: invalid internal key") + errInvalidInternalKey = errors.New("leveldb: Iterator: invalid internal key") ) type memdbReleaser struct { @@ -70,10 +70,10 @@ func (db *DB) newIterator(auxm *memDB, auxt tFiles, seq uint64, slice *util.Rang if slice != nil { islice = &util.Range{} if slice.Start != nil { - islice.Start = makeIkey(nil, slice.Start, kMaxSeq, ktSeek) + islice.Start = makeInternalKey(nil, slice.Start, keyMaxSeq, keyTypeSeek) } if slice.Limit != nil { - islice.Limit = makeIkey(nil, slice.Limit, kMaxSeq, ktSeek) + islice.Limit = makeInternalKey(nil, slice.Limit, keyMaxSeq, keyTypeSeek) } } rawIter := db.newRawIterator(auxm, auxt, islice, ro) @@ -187,7 +187,7 @@ func (i *dbIter) Seek(key []byte) bool { return false } - ikey := makeIkey(nil, key, i.seq, ktSeek) + ikey := makeInternalKey(nil, key, i.seq, keyTypeSeek) if i.iter.Seek(ikey) { i.dir = dirSOI return i.next() @@ -199,15 +199,15 @@ func (i *dbIter) Seek(key []byte) bool { func (i *dbIter) next() bool { for { - if ukey, seq, kt, kerr := parseIkey(i.iter.Key()); kerr == nil { + if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil { i.sampleSeek() if seq <= i.seq { switch kt { - case ktDel: + case keyTypeDel: // Skip deleted key. i.key = append(i.key[:0], ukey...) i.dir = dirForward - case ktVal: + case keyTypeVal: if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 { i.key = append(i.key[:0], ukey...) i.value = append(i.value[:0], i.iter.Value()...) @@ -250,13 +250,13 @@ func (i *dbIter) prev() bool { del := true if i.iter.Valid() { for { - if ukey, seq, kt, kerr := parseIkey(i.iter.Key()); kerr == nil { + if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil { i.sampleSeek() if seq <= i.seq { if !del && i.icmp.uCompare(ukey, i.key) < 0 { return true } - del = (kt == ktDel) + del = (kt == keyTypeDel) if !del { i.key = append(i.key[:0], ukey...) i.value = append(i.value[:0], i.iter.Value()...) @@ -292,7 +292,7 @@ func (i *dbIter) Prev() bool { return i.Last() case dirForward: for i.iter.Prev() { - if ukey, _, _, kerr := parseIkey(i.iter.Key()); kerr == nil { + if ukey, _, _, kerr := parseInternalKey(i.iter.Key()); kerr == nil { i.sampleSeek() if i.icmp.uCompare(ukey, i.key) < 0 { goto cont diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go index 556ae38046a945182f1b084e3d0fbbf823bcd943..c497c22f6f7214680e6363c70bb7a853c88e0c54 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_state.go @@ -57,7 +57,7 @@ func (db *DB) setSeq(seq uint64) { atomic.StoreUint64(&db.seq, seq) } -func (db *DB) sampleSeek(ikey iKey) { +func (db *DB) sampleSeek(ikey internalKey) { v := db.s.version() if v.sampleSeek(ikey) { // Trigger table compaction. diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_transaction.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_transaction.go index 29ef9b494ae759329e5e64830ad26d160cd165b0..05d6f9586131e2c23951626afc1dd0bda44c4438 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_transaction.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_transaction.go @@ -108,8 +108,8 @@ func (tr *Transaction) flush() error { return nil } -func (tr *Transaction) put(kt kType, key, value []byte) error { - tr.ikScratch = makeIkey(tr.ikScratch, key, tr.seq+1, kt) +func (tr *Transaction) put(kt keyType, key, value []byte) error { + tr.ikScratch = makeInternalKey(tr.ikScratch, key, tr.seq+1, kt) if tr.mem.Free() < len(tr.ikScratch)+len(value) { if err := tr.flush(); err != nil { return err @@ -134,7 +134,7 @@ func (tr *Transaction) Put(key, value []byte, wo *opt.WriteOptions) error { if tr.closed { return errTransactionDone } - return tr.put(ktVal, key, value) + return tr.put(keyTypeVal, key, value) } // Delete deletes the value for the given key. @@ -148,7 +148,7 @@ func (tr *Transaction) Delete(key []byte, wo *opt.WriteOptions) error { if tr.closed { return errTransactionDone } - return tr.put(ktDel, key, nil) + return tr.put(keyTypeDel, key, nil) } // Write apply the given batch to the transaction. The batch will be applied @@ -167,7 +167,7 @@ func (tr *Transaction) Write(b *Batch, wo *opt.WriteOptions) error { if tr.closed { return errTransactionDone } - return b.decodeRec(func(i int, kt kType, key, value []byte) error { + return b.decodeRec(func(i int, kt keyType, key, value []byte) error { return tr.put(kt, key, value) }) } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go index 7f484998622f4d11e63a74347bae98ad287d4f3b..3ade933be36fcd87ce221881acea441f327d0caa 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_util.go @@ -21,14 +21,16 @@ type Reader interface { NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator } -type Sizes []uint64 +// Sizes is list of size. +type Sizes []int64 // Sum returns sum of the sizes. -func (p Sizes) Sum() (n uint64) { - for _, s := range p { - n += s +func (sizes Sizes) Sum() int64 { + var sum int64 + for _, size := range sizes { + sum += size } - return n + return sum } // Logging. diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go index 079f40a30395acd6ade22d2436b82448b29c8e27..7ea731832fd34c62af61d1a6ce021f40983fccaf 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/db_write.go @@ -166,15 +166,15 @@ func (db *DB) Write(b *Batch, wo *opt.WriteOptions) (err error) { merged := 0 danglingMerge := false defer func() { + for i := 0; i < merged; i++ { + db.writeAckC <- err + } if danglingMerge { // Only one dangling merge at most, so this is safe. db.writeMergedC <- false } else { <-db.writeLockC } - for i := 0; i < merged; i++ { - db.writeAckC <- err - } }() mdb, mdbFree, err := db.flush(b.size()) @@ -281,8 +281,8 @@ func (db *DB) Delete(key []byte, wo *opt.WriteOptions) error { func isMemOverlaps(icmp *iComparer, mem *memdb.DB, min, max []byte) bool { iter := mem.NewIterator(nil) defer iter.Release() - return (max == nil || (iter.First() && icmp.uCompare(max, iKey(iter.Key()).ukey()) >= 0)) && - (min == nil || (iter.Last() && icmp.uCompare(min, iKey(iter.Key()).ukey()) <= 0)) + return (max == nil || (iter.First() && icmp.uCompare(max, internalKey(iter.Key()).ukey()) >= 0)) && + (min == nil || (iter.Last() && icmp.uCompare(min, internalKey(iter.Key()).ukey()) <= 0)) } // CompactRange compacts the underlying DB for the given key range. diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter.go index 0db0e850297bd9a61abe6a6874c25c933d105235..18f4844caac13a7edb884f325e2c5cd7f68af138 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter.go @@ -15,7 +15,7 @@ type iFilter struct { } func (f iFilter) Contains(filter, key []byte) bool { - return f.Filter.Contains(filter, iKey(key).ukey()) + return f.Filter.Contains(filter, internalKey(key).ukey()) } func (f iFilter) NewGenerator() filter.FilterGenerator { @@ -27,5 +27,5 @@ type iFilterGenerator struct { } func (g iFilterGenerator) Add(key []byte) { - g.FilterGenerator.Add(iKey(key).ukey()) + g.FilterGenerator.Add(internalKey(key).ukey()) } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go index 14ec7438472aca38dadc057985499d1d0f8e0f53..95c704fb18674465d6fb248aa5c36d8265feae5b 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/key.go @@ -14,26 +14,27 @@ import ( "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage" ) -type ErrIkeyCorrupted struct { +// ErrInternalKeyCorrupted records internal key corruption. +type ErrInternalKeyCorrupted struct { Ikey []byte Reason string } -func (e *ErrIkeyCorrupted) Error() string { - return fmt.Sprintf("leveldb: iKey %q corrupted: %s", e.Ikey, e.Reason) +func (e *ErrInternalKeyCorrupted) Error() string { + return fmt.Sprintf("leveldb: internal key %q corrupted: %s", e.Ikey, e.Reason) } -func newErrIkeyCorrupted(ikey []byte, reason string) error { - return errors.NewErrCorrupted(storage.FileDesc{}, &ErrIkeyCorrupted{append([]byte{}, ikey...), reason}) +func newErrInternalKeyCorrupted(ikey []byte, reason string) error { + return errors.NewErrCorrupted(storage.FileDesc{}, &ErrInternalKeyCorrupted{append([]byte{}, ikey...), reason}) } -type kType int +type keyType uint -func (kt kType) String() string { +func (kt keyType) String() string { switch kt { - case ktDel: + case keyTypeDel: return "d" - case ktVal: + case keyTypeVal: return "v" } return "x" @@ -42,39 +43,39 @@ func (kt kType) String() string { // Value types encoded as the last component of internal keys. // Don't modify; this value are saved to disk. const ( - ktDel kType = iota - ktVal + keyTypeDel keyType = iota + keyTypeVal ) -// ktSeek defines the kType that should be passed when constructing an +// keyTypeSeek defines the keyType that should be passed when constructing an // internal key for seeking to a particular sequence number (since we // sort sequence numbers in decreasing order and the value type is // embedded as the low 8 bits in the sequence number in internal keys, // we need to use the highest-numbered ValueType, not the lowest). -const ktSeek = ktVal +const keyTypeSeek = keyTypeVal const ( // Maximum value possible for sequence number; the 8-bits are // used by value type, so its can packed together in single // 64-bit integer. - kMaxSeq uint64 = (uint64(1) << 56) - 1 + keyMaxSeq = (uint64(1) << 56) - 1 // Maximum value possible for packed sequence number and type. - kMaxNum uint64 = (kMaxSeq << 8) | uint64(ktSeek) + keyMaxNum = (keyMaxSeq << 8) | uint64(keyTypeSeek) ) // Maximum number encoded in bytes. -var kMaxNumBytes = make([]byte, 8) +var keyMaxNumBytes = make([]byte, 8) func init() { - binary.LittleEndian.PutUint64(kMaxNumBytes, kMaxNum) + binary.LittleEndian.PutUint64(keyMaxNumBytes, keyMaxNum) } -type iKey []byte +type internalKey []byte -func makeIkey(dst, ukey []byte, seq uint64, kt kType) iKey { - if seq > kMaxSeq { +func makeInternalKey(dst, ukey []byte, seq uint64, kt keyType) internalKey { + if seq > keyMaxSeq { panic("leveldb: invalid sequence number") - } else if kt > ktVal { + } else if kt > keyTypeVal { panic("leveldb: invalid type") } @@ -85,63 +86,62 @@ func makeIkey(dst, ukey []byte, seq uint64, kt kType) iKey { } copy(dst, ukey) binary.LittleEndian.PutUint64(dst[len(ukey):], (seq<<8)|uint64(kt)) - return iKey(dst) + return internalKey(dst) } -func parseIkey(ik []byte) (ukey []byte, seq uint64, kt kType, err error) { +func parseInternalKey(ik []byte) (ukey []byte, seq uint64, kt keyType, err error) { if len(ik) < 8 { - return nil, 0, 0, newErrIkeyCorrupted(ik, "invalid length") + return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid length") } num := binary.LittleEndian.Uint64(ik[len(ik)-8:]) - seq, kt = uint64(num>>8), kType(num&0xff) - if kt > ktVal { - return nil, 0, 0, newErrIkeyCorrupted(ik, "invalid type") + seq, kt = uint64(num>>8), keyType(num&0xff) + if kt > keyTypeVal { + return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid type") } ukey = ik[:len(ik)-8] return } -func validIkey(ik []byte) bool { - _, _, _, err := parseIkey(ik) +func validInternalKey(ik []byte) bool { + _, _, _, err := parseInternalKey(ik) return err == nil } -func (ik iKey) assert() { +func (ik internalKey) assert() { if ik == nil { - panic("leveldb: nil iKey") + panic("leveldb: nil internalKey") } if len(ik) < 8 { - panic(fmt.Sprintf("leveldb: iKey %q, len=%d: invalid length", []byte(ik), len(ik))) + panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid length", []byte(ik), len(ik))) } } -func (ik iKey) ukey() []byte { +func (ik internalKey) ukey() []byte { ik.assert() return ik[:len(ik)-8] } -func (ik iKey) num() uint64 { +func (ik internalKey) num() uint64 { ik.assert() return binary.LittleEndian.Uint64(ik[len(ik)-8:]) } -func (ik iKey) parseNum() (seq uint64, kt kType) { +func (ik internalKey) parseNum() (seq uint64, kt keyType) { num := ik.num() - seq, kt = uint64(num>>8), kType(num&0xff) - if kt > ktVal { - panic(fmt.Sprintf("leveldb: iKey %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt)) + seq, kt = uint64(num>>8), keyType(num&0xff) + if kt > keyTypeVal { + panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt)) } return } -func (ik iKey) String() string { +func (ik internalKey) String() string { if ik == nil { return "<nil>" } - if ukey, seq, kt, err := parseIkey(ik); err == nil { + if ukey, seq, kt, err := parseInternalKey(ik); err == nil { return fmt.Sprintf("%s,%s%d", shorten(string(ukey)), kt, seq) - } else { - return "<invalid>" } + return "<invalid>" } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go index 710b51ea7c636ea87d7e056d4e34e5408d3f1506..de984360bd6ca9c5af4eac3d361144b9fc03c445 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session.go @@ -18,6 +18,7 @@ import ( "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/storage" ) +// ErrManifestCorrupted records manifest corruption. type ErrManifestCorrupted struct { Field string Reason string @@ -50,8 +51,8 @@ type session struct { manifestWriter storage.Writer manifestFd storage.FileDesc - stCompPtrs []iKey // compaction pointers; need external synchronization - stVersion *version // current version + stCompPtrs []internalKey // compaction pointers; need external synchronization + stVersion *version // current version vmu sync.Mutex } @@ -146,7 +147,7 @@ func (s *session) recover() (err error) { if err == nil { // save compact pointers for _, r := range rec.compPtrs { - s.setCompPtr(r.level, iKey(r.ikey)) + s.setCompPtr(r.level, internalKey(r.ikey)) } // commit record to version staging staging.commit(rec) @@ -154,9 +155,8 @@ func (s *session) recover() (err error) { err = errors.SetFd(err, fd) if strict || !errors.IsCorrupted(err) { return - } else { - s.logf("manifest error: %v (skipped)", errors.SetFd(err, fd)) } + s.logf("manifest error: %v (skipped)", errors.SetFd(err, fd)) } rec.resetCompPtrs() rec.resetAddedTables() diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_compaction.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_compaction.go index 401ab4a5170359ecb24c9c1c29041ae8d6eee0c8..b4fe7f4c6e8e63192082e733952efd42fd12eebd 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_compaction.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_compaction.go @@ -139,7 +139,7 @@ type compaction struct { gpi int seenKey bool gpOverlappedBytes int64 - imin, imax iKey + imin, imax internalKey tPtrs []int released bool @@ -242,7 +242,7 @@ func (c *compaction) baseLevelForKey(ukey []byte) bool { return true } -func (c *compaction) shouldStopBefore(ikey iKey) bool { +func (c *compaction) shouldStopBefore(ikey internalKey) bool { for ; c.gpi < len(c.gp); c.gpi++ { gp := c.gp[c.gpi] if c.s.icmp.Compare(ikey, gp.imax) <= 0 { diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go index 20a2a33bb78c3d4cb9a01007800deddd86a73d66..3496aa528986a2d812ad37e921d0c5f7808aa37f 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_record.go @@ -36,15 +36,15 @@ const ( type cpRecord struct { level int - ikey iKey + ikey internalKey } type atRecord struct { level int num int64 size int64 - imin iKey - imax iKey + imin internalKey + imax internalKey } type dtRecord struct { @@ -96,7 +96,7 @@ func (p *sessionRecord) setSeqNum(num uint64) { p.seqNum = num } -func (p *sessionRecord) addCompPtr(level int, ikey iKey) { +func (p *sessionRecord) addCompPtr(level int, ikey internalKey) { p.hasRec |= 1 << recCompPtr p.compPtrs = append(p.compPtrs, cpRecord{level, ikey}) } @@ -106,7 +106,7 @@ func (p *sessionRecord) resetCompPtrs() { p.compPtrs = p.compPtrs[:0] } -func (p *sessionRecord) addTable(level int, num, size int64, imin, imax iKey) { +func (p *sessionRecord) addTable(level int, num, size int64, imin, imax internalKey) { p.hasRec |= 1 << recAddTable p.addedTables = append(p.addedTables, atRecord{level, num, size, imin, imax}) } @@ -299,7 +299,7 @@ func (p *sessionRecord) decode(r io.Reader) error { level := p.readLevel("comp-ptr.level", br) ikey := p.readBytes("comp-ptr.ikey", br) if p.err == nil { - p.addCompPtr(level, iKey(ikey)) + p.addCompPtr(level, internalKey(ikey)) } case recAddTable: level := p.readLevel("add-table.level", br) diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go index 19c243c97a673867e0ee36d4c09bbfbc052b7a7b..c70700afd9a6354a7ac31a3b48f0d049cf15ef92 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/session_util.go @@ -106,17 +106,17 @@ func (s *session) reuseFileNum(num int64) { } // Set compaction ptr at given level; need external synchronization. -func (s *session) setCompPtr(level int, ik iKey) { +func (s *session) setCompPtr(level int, ik internalKey) { if level >= len(s.stCompPtrs) { - newCompPtrs := make([]iKey, level+1) + newCompPtrs := make([]internalKey, level+1) copy(newCompPtrs, s.stCompPtrs) s.stCompPtrs = newCompPtrs } - s.stCompPtrs[level] = append(iKey{}, ik...) + s.stCompPtrs[level] = append(internalKey{}, ik...) } // Get compaction ptr at given level; need external synchronization. -func (s *session) getCompPtr(level int) iKey { +func (s *session) getCompPtr(level int) internalKey { if level >= len(s.stCompPtrs) { return nil } @@ -165,7 +165,7 @@ func (s *session) recordCommited(rec *sessionRecord) { } for _, r := range rec.compPtrs { - s.setCompPtr(r.level, iKey(r.ikey)) + s.setCompPtr(r.level, internalKey(r.ikey)) } } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go index cd9d2d681cbaeabfe6ac80382ea1341c93fbd05c..f68f6aca2bedc61bacc71c35f0af9ed27fb3391c 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table.go @@ -24,7 +24,7 @@ type tFile struct { fd storage.FileDesc seekLeft int32 size int64 - imin, imax iKey + imin, imax internalKey } // Returns true if given key is after largest key of this table. @@ -48,7 +48,7 @@ func (t *tFile) consumeSeek() int32 { } // Creates new tFile. -func newTableFile(fd storage.FileDesc, size int64, imin, imax iKey) *tFile { +func newTableFile(fd storage.FileDesc, size int64, imin, imax internalKey) *tFile { f := &tFile{ fd: fd, size: size, @@ -136,7 +136,7 @@ func (tf tFiles) size() (sum int64) { // Searches smallest index of tables whose its smallest // key is after or equal with given key. -func (tf tFiles) searchMin(icmp *iComparer, ikey iKey) int { +func (tf tFiles) searchMin(icmp *iComparer, ikey internalKey) int { return sort.Search(len(tf), func(i int) bool { return icmp.Compare(tf[i].imin, ikey) >= 0 }) @@ -144,7 +144,7 @@ func (tf tFiles) searchMin(icmp *iComparer, ikey iKey) int { // Searches smallest index of tables whose its largest // key is after or equal with given key. -func (tf tFiles) searchMax(icmp *iComparer, ikey iKey) int { +func (tf tFiles) searchMax(icmp *iComparer, ikey internalKey) int { return sort.Search(len(tf), func(i int) bool { return icmp.Compare(tf[i].imax, ikey) >= 0 }) @@ -166,7 +166,7 @@ func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) boo i := 0 if len(umin) > 0 { // Find the earliest possible internal key for min. - i = tf.searchMax(icmp, makeIkey(nil, umin, kMaxSeq, ktSeek)) + i = tf.searchMax(icmp, makeInternalKey(nil, umin, keyMaxSeq, keyTypeSeek)) } if i >= len(tf) { // Beginning of range is after all files, so no overlap. @@ -209,7 +209,7 @@ func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, ove } // Returns tables key range. -func (tf tFiles) getRange(icmp *iComparer) (imin, imax iKey) { +func (tf tFiles) getRange(icmp *iComparer) (imin, imax internalKey) { for i, t := range tf { if i == 0 { imin, imax = t.imin, t.imax @@ -231,10 +231,10 @@ func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range if slice != nil { var start, limit int if slice.Start != nil { - start = tf.searchMax(icmp, iKey(slice.Start)) + start = tf.searchMax(icmp, internalKey(slice.Start)) } if slice.Limit != nil { - limit = tf.searchMin(icmp, iKey(slice.Limit)) + limit = tf.searchMin(icmp, internalKey(slice.Limit)) } else { limit = tf.Len() } @@ -259,7 +259,7 @@ type tFilesArrayIndexer struct { } func (a *tFilesArrayIndexer) Search(key []byte) int { - return a.searchMax(a.icmp, iKey(key)) + return a.searchMax(a.icmp, internalKey(key)) } func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator { @@ -351,9 +351,9 @@ func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) { return 0, nil } - var bcache *cache.CacheGetter + var bcache *cache.NamespaceGetter if t.bcache != nil { - bcache = &cache.CacheGetter{Cache: t.bcache, NS: uint64(f.fd.Num)} + bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)} } var tr *table.Reader @@ -393,14 +393,13 @@ func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, } // Returns approximate offset of the given key. -func (t *tOps) offsetOf(f *tFile, key []byte) (offset uint64, err error) { +func (t *tOps) offsetOf(f *tFile, key []byte) (offset int64, err error) { ch, err := t.open(f) if err != nil { return } defer ch.Release() - offset_, err := ch.Value().(*table.Reader).OffsetOf(key) - return uint64(offset_), err + return ch.Value().(*table.Reader).OffsetOf(key) } // Creates an iterator from the given table. @@ -515,7 +514,7 @@ func (w *tWriter) finish() (f *tFile, err error) { return } } - f = newTableFile(w.fd, int64(w.tw.BytesLen()), iKey(w.first), iKey(w.last)) + f = newTableFile(w.fd, int64(w.tw.BytesLen()), internalKey(w.first), internalKey(w.last)) return } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go index 6becfb0817e03562d054b8b546d38291bd8135d2..bf40452af32efc72296c59cb42886744ff5a8dd7 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/table/reader.go @@ -509,7 +509,7 @@ type Reader struct { mu sync.RWMutex fd storage.FileDesc reader io.ReaderAt - cache *cache.CacheGetter + cache *cache.NamespaceGetter err error bpool *util.BufferPool // Options @@ -988,7 +988,7 @@ func (r *Reader) Release() { // The fi, cache and bpool is optional and can be nil. // // The returned table reader instance is goroutine-safe. -func NewReader(f io.ReaderAt, size int64, fd storage.FileDesc, cache *cache.CacheGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) { +func NewReader(f io.ReaderAt, size int64, fd storage.FileDesc, cache *cache.NamespaceGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) { if f == nil { return nil, errors.New("leveldb/table: nil file") } diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool.go deleted file mode 100644 index 1f7fdd41fe4edda0c6083fac96fa16d71ab80d59..0000000000000000000000000000000000000000 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> -// All rights reserved. -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// +build go1.3 - -package util - -import ( - "sync" -) - -type Pool struct { - sync.Pool -} - -func NewPool(cap int) *Pool { - return &Pool{} -} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool_legacy.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool_legacy.go deleted file mode 100644 index 27b8d03be9477703757e0043fe70c28a05c27756..0000000000000000000000000000000000000000 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util/pool_legacy.go +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com> -// All rights reserved. -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// +build !go1.3 - -package util - -type Pool struct { - pool chan interface{} -} - -func (p *Pool) Get() interface{} { - select { - case x := <-p.pool: - return x - default: - return nil - } -} - -func (p *Pool) Put(x interface{}) { - select { - case p.pool <- x: - default: - } -} - -func NewPool(cap int) *Pool { - return &Pool{pool: make(chan interface{}, cap)} -} diff --git a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go index 14338766b82a653063a545719f355f7f2428c8f4..6bea1c70d2223382a99453c2f7563d851e42cd3a 100644 --- a/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go +++ b/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/version.go @@ -79,7 +79,7 @@ func (v *version) release() { v.s.vmu.Unlock() } -func (v *version) walkOverlapping(aux tFiles, ikey iKey, f func(level int, t *tFile) bool, lf func(level int) bool) { +func (v *version) walkOverlapping(aux tFiles, ikey internalKey, f func(level int, t *tFile) bool, lf func(level int) bool) { ukey := ikey.ukey() // Aux level. @@ -130,7 +130,7 @@ func (v *version) walkOverlapping(aux tFiles, ikey iKey, f func(level int, t *tF } } -func (v *version) get(aux tFiles, ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) { +func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) { ukey := ikey.ukey() var ( @@ -140,7 +140,7 @@ func (v *version) get(aux tFiles, ikey iKey, ro *opt.ReadOptions, noValue bool) // Level-0. zfound bool zseq uint64 - zkt kType + zkt keyType zval []byte ) @@ -176,7 +176,7 @@ func (v *version) get(aux tFiles, ikey iKey, ro *opt.ReadOptions, noValue bool) return false } - if fukey, fseq, fkt, fkerr := parseIkey(fikey); fkerr == nil { + if fukey, fseq, fkt, fkerr := parseInternalKey(fikey); fkerr == nil { if v.s.icmp.uCompare(ukey, fukey) == 0 { // Level <= 0 may overlaps each-other. if level <= 0 { @@ -188,12 +188,12 @@ func (v *version) get(aux tFiles, ikey iKey, ro *opt.ReadOptions, noValue bool) } } else { switch fkt { - case ktVal: + case keyTypeVal: value = fval err = nil - case ktDel: + case keyTypeDel: default: - panic("leveldb: invalid iKey type") + panic("leveldb: invalid internalKey type") } return false } @@ -207,12 +207,12 @@ func (v *version) get(aux tFiles, ikey iKey, ro *opt.ReadOptions, noValue bool) }, func(level int) bool { if zfound { switch zkt { - case ktVal: + case keyTypeVal: value = zval err = nil - case ktDel: + case keyTypeDel: default: - panic("leveldb: invalid iKey type") + panic("leveldb: invalid internalKey type") } return false } @@ -227,19 +227,18 @@ func (v *version) get(aux tFiles, ikey iKey, ro *opt.ReadOptions, noValue bool) return } -func (v *version) sampleSeek(ikey iKey) (tcomp bool) { +func (v *version) sampleSeek(ikey internalKey) (tcomp bool) { var tset *tSet v.walkOverlapping(nil, ikey, func(level int, t *tFile) bool { if tset == nil { tset = &tSet{level, t} return true - } else { - if tset.table.consumeSeek() <= 0 { - tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset)) - } - return false } + if tset.table.consumeSeek() <= 0 { + tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset)) + } + return false }, nil) return @@ -286,12 +285,12 @@ func (v *version) tLen(level int) int { return 0 } -func (v *version) offsetOf(ikey iKey) (n uint64, err error) { +func (v *version) offsetOf(ikey internalKey) (n int64, err error) { for level, tables := range v.levels { for _, t := range tables { if v.s.icmp.Compare(t.imax, ikey) <= 0 { // Entire file is before "ikey", so just add the file size - n += uint64(t.size) + n += t.size } else if v.s.icmp.Compare(t.imin, ikey) > 0 { // Entire file is after "ikey", so ignore if level > 0 { @@ -303,12 +302,11 @@ func (v *version) offsetOf(ikey iKey) (n uint64, err error) { } else { // "ikey" falls in the range for this table. Add the // approximate offset of "ikey" within the table. - var nn uint64 - nn, err = v.s.tops.offsetOf(t, ikey) - if err != nil { + if m, err := v.s.tops.offsetOf(t, ikey); err == nil { + n += m + } else { return 0, err } - n += nn } } } diff --git a/Godeps/_workspace/src/golang.org/x/text/encoding/japanese/shiftjis.go b/Godeps/_workspace/src/golang.org/x/text/encoding/japanese/shiftjis.go index d4753dba6e82b626dd76fdc3850219c3da6f093b..da181c96ca9373a352b0472802e83560d4d08ece 100644 --- a/Godeps/_workspace/src/golang.org/x/text/encoding/japanese/shiftjis.go +++ b/Godeps/_workspace/src/golang.org/x/text/encoding/japanese/shiftjis.go @@ -39,7 +39,7 @@ loop: case 0xa1 <= c0 && c0 < 0xe0: r, size = rune(c0)+(0xff61-0xa1), 1 - case (0x81 <= c0 && c0 < 0xa0) || (0xe0 <= c0 && c0 < 0xf0): + case (0x81 <= c0 && c0 < 0xa0) || (0xe0 <= c0 && c0 < 0xfd): if c0 <= 0x9f { c0 -= 0x70 } else {