Commit bc3555a5 authored by ale's avatar ale

switch from levigo (cgo) to goleveldb (native)

parent 56539527
{ {
"ImportPath": "git.autistici.org/ale/liber", "ImportPath": "git.autistici.org/ale/liber",
"GoVersion": "go1.5.3", "GoVersion": "go1.5.3",
"Packages": [
"."
],
"Deps": [ "Deps": [
{ {
"ImportPath": "github.com/blevesearch/bleve", "ImportPath": "github.com/blevesearch/bleve",
...@@ -24,6 +27,10 @@ ...@@ -24,6 +27,10 @@
"ImportPath": "github.com/golang/protobuf/proto", "ImportPath": "github.com/golang/protobuf/proto",
"Rev": "0dfe8f37844c14cb32c7247925270e0f7ba90973" "Rev": "0dfe8f37844c14cb32c7247925270e0f7ba90973"
}, },
{
"ImportPath": "github.com/golang/snappy",
"Rev": "c2359a1bd0bd4a2de4f1bd92ccd045fb60d0a994"
},
{ {
"ImportPath": "github.com/gorilla/context", "ImportPath": "github.com/gorilla/context",
"Rev": "14f550f51af52180c2eefed15e5fd18d63c0a64a" "Rev": "14f550f51af52180c2eefed15e5fd18d63c0a64a"
...@@ -32,10 +39,6 @@ ...@@ -32,10 +39,6 @@
"ImportPath": "github.com/gorilla/mux", "ImportPath": "github.com/gorilla/mux",
"Rev": "14cafe28513321476c73967a5a4f3454b6129c46" "Rev": "14cafe28513321476c73967a5a4f3454b6129c46"
}, },
{
"ImportPath": "github.com/jmhodges/levigo",
"Rev": "251e8f03dd397a865450836879aa350af87dd1de"
},
{ {
"ImportPath": "github.com/meskio/epubgo", "ImportPath": "github.com/meskio/epubgo",
"Rev": "815a36010f173a9ff64115e7ce4eafeed82fd404" "Rev": "815a36010f173a9ff64115e7ce4eafeed82fd404"
...@@ -48,6 +51,10 @@ ...@@ -48,6 +51,10 @@
"ImportPath": "github.com/steveyen/gtreap", "ImportPath": "github.com/steveyen/gtreap",
"Rev": "0abe01ef9be25c4aedc174758ec2d917314d6d70" "Rev": "0abe01ef9be25c4aedc174758ec2d917314d6d70"
}, },
{
"ImportPath": "github.com/syndtr/goleveldb/leveldb",
"Rev": "e7e6f5b5ef25adb580feac515f9ccec514d0bda8"
},
{ {
"ImportPath": "github.com/willf/bitset", "ImportPath": "github.com/willf/bitset",
"Comment": "v1.0.0-51-g9e6a3a7", "Comment": "v1.0.0-51-g9e6a3a7",
......
...@@ -11,7 +11,7 @@ package goleveldb ...@@ -11,7 +11,7 @@ package goleveldb
import ( import (
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store"
"github.com/syndtr/goleveldb/leveldb" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb"
) )
type Batch struct { type Batch struct {
......
package goleveldb package goleveldb
import ( import (
"github.com/syndtr/goleveldb/leveldb/filter" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter"
"github.com/syndtr/goleveldb/leveldb/opt" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
) )
func applyConfig(o *opt.Options, config map[string]interface{}) (*opt.Options, error) { func applyConfig(o *opt.Options, config map[string]interface{}) (*opt.Options, error) {
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
package goleveldb package goleveldb
import "github.com/syndtr/goleveldb/leveldb/iterator" import "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator"
type Iterator struct { type Iterator struct {
store *Store store *Store
......
...@@ -11,8 +11,8 @@ package goleveldb ...@@ -11,8 +11,8 @@ package goleveldb
import ( import (
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store"
"github.com/syndtr/goleveldb/leveldb" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/util" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
) )
type Reader struct { type Reader struct {
......
...@@ -14,8 +14,8 @@ import ( ...@@ -14,8 +14,8 @@ import (
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/registry" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/registry"
"github.com/syndtr/goleveldb/leveldb" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/opt" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
) )
const Name = "goleveldb" const Name = "goleveldb"
......
...@@ -13,7 +13,7 @@ import ( ...@@ -13,7 +13,7 @@ import (
"fmt" "fmt"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store"
"github.com/syndtr/goleveldb/leveldb" "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb"
) )
type Writer struct { type Writer struct {
......
testdata/alice29.txt
testdata/asyoulik.txt
testdata/fireworks.jpeg
testdata/geo.protodata
testdata/html
testdata/html_x_4
testdata/kppkn.gtb
testdata/lcet10.txt
testdata/paper-100k.pdf
testdata/plrabn12.txt
testdata/urls.10K
# This is the official list of Snappy-Go authors for copyright purposes.
# This file is distinct from the CONTRIBUTORS files.
# See the latter for an explanation.
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# Please keep the list sorted.
Damian Gryski <dgryski@gmail.com>
Google Inc.
Jan Mercl <0xjnml@gmail.com>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Sebastien Binet <seb.binet@gmail.com>
# This is the official list of people who can contribute
# (and typically have contributed) code to the Snappy-Go repository.
# The AUTHORS file lists the copyright holders; this file
# lists people. For example, Google employees are listed here
# but not in AUTHORS, because Google holds the copyright.
#
# The submission process automatically checks to make sure
# that people submitting code are listed in this file (by email address).
#
# Names should be added to this file only after verifying that
# the individual or the individual's organization has agreed to
# the appropriate Contributor License Agreement, found here:
#
# http://code.google.com/legal/individual-cla-v1.0.html
# http://code.google.com/legal/corporate-cla-v1.0.html
#
# The agreement for individuals can be filled out on the web.
#
# When adding J Random Contributor's name to this file,
# either J's name or J's organization's name should be
# added to the AUTHORS file, depending on whether the
# individual or corporate CLA was used.
# Names should be added to this file like so:
# Name <email address>
# Please keep the list sorted.
Damian Gryski <dgryski@gmail.com>
Jan Mercl <0xjnml@gmail.com>
Kai Backman <kaib@golang.org>
Marc-Antoine Ruel <maruel@chromium.org>
Nigel Tao <nigeltao@golang.org>
Rob Pike <r@golang.org>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Russ Cox <rsc@golang.org>
Sebastien Binet <seb.binet@gmail.com>
Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The Snappy compression format in the Go programming language.
To download and install from source:
$ go get github.com/golang/snappy
Unless otherwise noted, the Snappy-Go source files are distributed
under the BSD-style license found in the LICENSE file.
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"encoding/binary"
"errors"
"io"
)
var (
// ErrCorrupt reports that the input is invalid.
ErrCorrupt = errors.New("snappy: corrupt input")
// ErrTooLarge reports that the uncompressed length is too large.
ErrTooLarge = errors.New("snappy: decoded block is too large")
// ErrUnsupported reports that the input isn't supported.
ErrUnsupported = errors.New("snappy: unsupported input")
errUnsupportedCopy4Tag = errors.New("snappy: unsupported COPY_4 tag")
errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
)
// DecodedLen returns the length of the decoded block.
func DecodedLen(src []byte) (int, error) {
v, _, err := decodedLen(src)
return v, err
}
// decodedLen returns the length of the decoded block and the number of bytes
// that the length header occupied.
func decodedLen(src []byte) (blockLen, headerLen int, err error) {
v, n := binary.Uvarint(src)
if n <= 0 || v > 0xffffffff {
return 0, 0, ErrCorrupt
}
const wordSize = 32 << (^uint(0) >> 32 & 1)
if wordSize == 32 && v > 0x7fffffff {
return 0, 0, ErrTooLarge
}
return int(v), n, nil
}
// Decode returns the decoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire decoded block.
// Otherwise, a newly allocated slice will be returned.
// It is valid to pass a nil dst.
func Decode(dst, src []byte) ([]byte, error) {
dLen, s, err := decodedLen(src)
if err != nil {
return nil, err
}
if len(dst) < dLen {
dst = make([]byte, dLen)
}
var d, offset, length int
for s < len(src) {
switch src[s] & 0x03 {
case tagLiteral:
x := uint(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return nil, ErrCorrupt
}
x = uint(src[s-1])
case x == 61:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return nil, ErrCorrupt
}
x = uint(src[s-2]) | uint(src[s-1])<<8
case x == 62:
s += 4
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return nil, ErrCorrupt
}
x = uint(src[s-3]) | uint(src[s-2])<<8 | uint(src[s-1])<<16
case x == 63:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return nil, ErrCorrupt
}
x = uint(src[s-4]) | uint(src[s-3])<<8 | uint(src[s-2])<<16 | uint(src[s-1])<<24
}
length = int(x + 1)
if length <= 0 {
return nil, errUnsupportedLiteralLength
}
if length > len(dst)-d || length > len(src)-s {
return nil, ErrCorrupt
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
if s > len(src) {
return nil, ErrCorrupt
}
length = 4 + int(src[s-2])>>2&0x7
offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
case tagCopy2:
s += 3
if s > len(src) {
return nil, ErrCorrupt
}
length = 1 + int(src[s-3])>>2
offset = int(src[s-2]) | int(src[s-1])<<8
case tagCopy4:
return nil, errUnsupportedCopy4Tag
}
if offset <= 0 || d < offset || length > len(dst)-d {
return nil, ErrCorrupt
}
for end := d + length; d != end; d++ {
dst[d] = dst[d-offset]
}
}
if d != dLen {
return nil, ErrCorrupt
}
return dst[:d], nil
}
// NewReader returns a new Reader that decompresses from r, using the framing
// format described at
// https://github.com/google/snappy/blob/master/framing_format.txt
func NewReader(r io.Reader) *Reader {
return &Reader{
r: r,
decoded: make([]byte, maxUncompressedChunkLen),
buf: make([]byte, maxEncodedLenOfMaxUncompressedChunkLen+checksumSize),
}
}
// Reader is an io.Reader that can read Snappy-compressed bytes.
type Reader struct {
r io.Reader
err error
decoded []byte
buf []byte
// decoded[i:j] contains decoded bytes that have not yet been passed on.
i, j int
readHeader bool
}
// Reset discards any buffered data, resets all state, and switches the Snappy
// reader to read from r. This permits reusing a Reader rather than allocating
// a new one.
func (r *Reader) Reset(reader io.Reader) {
r.r = reader
r.err = nil
r.i = 0
r.j = 0
r.readHeader = false
}
func (r *Reader) readFull(p []byte) (ok bool) {
if _, r.err = io.ReadFull(r.r, p); r.err != nil {
if r.err == io.ErrUnexpectedEOF {
r.err = ErrCorrupt
}
return false
}
return true
}
// Read satisfies the io.Reader interface.
func (r *Reader) Read(p []byte) (int, error) {
if r.err != nil {
return 0, r.err
}
for {
if r.i < r.j {
n := copy(p, r.decoded[r.i:r.j])
r.i += n
return n, nil
}
if !r.readFull(r.buf[:4]) {
return 0, r.err
}
chunkType := r.buf[0]
if !r.readHeader {
if chunkType != chunkTypeStreamIdentifier {
r.err = ErrCorrupt
return 0, r.err
}
r.readHeader = true
}
chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
if chunkLen > len(r.buf) {
r.err = ErrUnsupported
return 0, r.err
}
// The chunk types are specified at
// https://github.com/google/snappy/blob/master/framing_format.txt
switch chunkType {
case chunkTypeCompressedData:
// Section 4.2. Compressed data (chunk type 0x00).
if chunkLen < checksumSize {
r.err = ErrCorrupt
return 0, r.err
}
buf := r.buf[:chunkLen]
if !r.readFull(buf) {
return 0, r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
buf = buf[checksumSize:]
n, err := DecodedLen(buf)
if err != nil {
r.err = err
return 0, r.err
}
if n > len(r.decoded) {
r.err = ErrCorrupt
return 0, r.err
}
if _, err := Decode(r.decoded, buf); err != nil {
r.err = err
return 0, r.err
}
if crc(r.decoded[:n]) != checksum {
r.err = ErrCorrupt
return 0, r.err
}
r.i, r.j = 0, n
continue
case chunkTypeUncompressedData:
// Section 4.3. Uncompressed data (chunk type 0x01).
if chunkLen < checksumSize {
r.err = ErrCorrupt
return 0, r.err
}
buf := r.buf[:checksumSize]
if !r.readFull(buf) {
return 0, r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
// Read directly into r.decoded instead of via r.buf.
n := chunkLen - checksumSize
if !r.readFull(r.decoded[:n]) {
return 0, r.err
}
if crc(r.decoded[:n]) != checksum {
r.err = ErrCorrupt
return 0, r.err
}
r.i, r.j = 0, n
continue
case chunkTypeStreamIdentifier:
// Section 4.1. Stream identifier (chunk type 0xff).
if chunkLen != len(magicBody) {
r.err = ErrCorrupt
return 0, r.err
}
if !r.readFull(r.buf[:len(magicBody)]) {
return 0, r.err
}
for i := 0; i < len(magicBody); i++ {
if r.buf[i] != magicBody[i] {
r.err = ErrCorrupt
return 0, r.err
}
}
continue
}
if chunkType <= 0x7f {
// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
r.err = ErrUnsupported
return 0, r.err
}
// Section 4.4 Padding (chunk type 0xfe).
// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
if !r.readFull(r.buf[:chunkLen]) {
return 0, r.err
}
}
}
This diff is collapsed.
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package snappy implements the snappy block-based compression format.
// It aims for very high speeds and reasonable compression.
//
// The C++ snappy implementation is at https://github.com/google/snappy
package snappy
import (
"hash/crc32"
)
/*
Each encoded block begins with the varint-encoded length of the decoded data,
followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
first byte of each chunk is broken into its 2 least and 6 most significant bits
called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
Zero means a literal tag. All other values mean a copy tag.
For literal tags:
- If m < 60, the next 1 + m bytes are literal bytes.
- Otherwise, let n be the little-endian unsigned integer denoted by the next
m - 59 bytes. The next 1 + n bytes after that are literal bytes.
For copy tags, length bytes are copied from offset bytes ago, in the style of
Lempel-Ziv compression algorithms. In particular:
- For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
of the offset. The next byte is bits 0-7 of the offset.
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
The length is 1 + m. The offset is the little-endian unsigned integer
denoted by the next 2 bytes.
- For l == 3, this tag is a legacy format that is no longer supported.
*/
const (
tagLiteral = 0x00
tagCopy1 = 0x01
tagCopy2 = 0x02
tagCopy4 = 0x03
)
const (
checksumSize = 4
chunkHeaderSize = 4
magicChunk = "\xff\x06\x00\x00" + magicBody
magicBody = "sNaPpY"
// https://github.com/google/snappy/blob/master/framing_format.txt says
// that "the uncompressed data in a chunk must be no longer than 65536 bytes".
maxUncompressedChunkLen = 65536
// maxEncodedLenOfMaxUncompressedChunkLen equals
// MaxEncodedLen(maxUncompressedChunkLen), but is hard coded to be a const
// instead of a variable, so that obufLen can also be a const. Their
// equivalence is confirmed by TestMaxEncodedLenOfMaxUncompressedChunkLen.
maxEncodedLenOfMaxUncompressedChunkLen = 76490
obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize
obufLen = obufHeaderLen + maxEncodedLenOfMaxUncompressedChunkLen
)
const (
chunkTypeCompressedData = 0x00
chunkTypeUncompressedData = 0x01
chunkTypePadding = 0xfe
chunkTypeStreamIdentifier = 0xff
)
var crcTable = crc32.MakeTable(crc32.Castagnoli)
// crc implements the checksum specified in section 3 of
// https://github.com/google/snappy/blob/master/framing_format.txt