Commit bc3555a5 authored by ale's avatar ale

switch from levigo (cgo) to goleveldb (native)

parent 56539527
{
"ImportPath": "git.autistici.org/ale/liber",
"GoVersion": "go1.5.3",
"Packages": [
"."
],
"Deps": [
{
"ImportPath": "github.com/blevesearch/bleve",
......@@ -24,6 +27,10 @@
"ImportPath": "github.com/golang/protobuf/proto",
"Rev": "0dfe8f37844c14cb32c7247925270e0f7ba90973"
},
{
"ImportPath": "github.com/golang/snappy",
"Rev": "c2359a1bd0bd4a2de4f1bd92ccd045fb60d0a994"
},
{
"ImportPath": "github.com/gorilla/context",
"Rev": "14f550f51af52180c2eefed15e5fd18d63c0a64a"
......@@ -32,10 +39,6 @@
"ImportPath": "github.com/gorilla/mux",
"Rev": "14cafe28513321476c73967a5a4f3454b6129c46"
},
{
"ImportPath": "github.com/jmhodges/levigo",
"Rev": "251e8f03dd397a865450836879aa350af87dd1de"
},
{
"ImportPath": "github.com/meskio/epubgo",
"Rev": "815a36010f173a9ff64115e7ce4eafeed82fd404"
......@@ -48,6 +51,10 @@
"ImportPath": "github.com/steveyen/gtreap",
"Rev": "0abe01ef9be25c4aedc174758ec2d917314d6d70"
},
{
"ImportPath": "github.com/syndtr/goleveldb/leveldb",
"Rev": "e7e6f5b5ef25adb580feac515f9ccec514d0bda8"
},
{
"ImportPath": "github.com/willf/bitset",
"Comment": "v1.0.0-51-g9e6a3a7",
......
......@@ -11,7 +11,7 @@ package goleveldb
import (
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store"
"github.com/syndtr/goleveldb/leveldb"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb"
)
type Batch struct {
......
package goleveldb
import (
"github.com/syndtr/goleveldb/leveldb/filter"
"github.com/syndtr/goleveldb/leveldb/opt"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/filter"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
)
func applyConfig(o *opt.Options, config map[string]interface{}) (*opt.Options, error) {
......
......@@ -9,7 +9,7 @@
package goleveldb
import "github.com/syndtr/goleveldb/leveldb/iterator"
import "git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/iterator"
type Iterator struct {
store *Store
......
......@@ -11,8 +11,8 @@ package goleveldb
import (
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/util"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/util"
)
type Reader struct {
......
......@@ -14,8 +14,8 @@ import (
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/registry"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/opt"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb/opt"
)
const Name = "goleveldb"
......
......@@ -13,7 +13,7 @@ import (
"fmt"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/blevesearch/bleve/index/store"
"github.com/syndtr/goleveldb/leveldb"
"git.autistici.org/ale/liber/Godeps/_workspace/src/github.com/syndtr/goleveldb/leveldb"
)
type Writer struct {
......
testdata/alice29.txt
testdata/asyoulik.txt
testdata/fireworks.jpeg
testdata/geo.protodata
testdata/html
testdata/html_x_4
testdata/kppkn.gtb
testdata/lcet10.txt
testdata/paper-100k.pdf
testdata/plrabn12.txt
testdata/urls.10K
# This is the official list of Snappy-Go authors for copyright purposes.
# This file is distinct from the CONTRIBUTORS files.
# See the latter for an explanation.
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# Please keep the list sorted.
Damian Gryski <dgryski@gmail.com>
Google Inc.
Jan Mercl <0xjnml@gmail.com>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Sebastien Binet <seb.binet@gmail.com>
# This is the official list of people who can contribute
# (and typically have contributed) code to the Snappy-Go repository.
# The AUTHORS file lists the copyright holders; this file
# lists people. For example, Google employees are listed here
# but not in AUTHORS, because Google holds the copyright.
#
# The submission process automatically checks to make sure
# that people submitting code are listed in this file (by email address).
#
# Names should be added to this file only after verifying that
# the individual or the individual's organization has agreed to
# the appropriate Contributor License Agreement, found here:
#
# http://code.google.com/legal/individual-cla-v1.0.html
# http://code.google.com/legal/corporate-cla-v1.0.html
#
# The agreement for individuals can be filled out on the web.
#
# When adding J Random Contributor's name to this file,
# either J's name or J's organization's name should be
# added to the AUTHORS file, depending on whether the
# individual or corporate CLA was used.
# Names should be added to this file like so:
# Name <email address>
# Please keep the list sorted.
Damian Gryski <dgryski@gmail.com>
Jan Mercl <0xjnml@gmail.com>
Kai Backman <kaib@golang.org>
Marc-Antoine Ruel <maruel@chromium.org>
Nigel Tao <nigeltao@golang.org>
Rob Pike <r@golang.org>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Russ Cox <rsc@golang.org>
Sebastien Binet <seb.binet@gmail.com>
Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The Snappy compression format in the Go programming language.
To download and install from source:
$ go get github.com/golang/snappy
Unless otherwise noted, the Snappy-Go source files are distributed
under the BSD-style license found in the LICENSE file.
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"encoding/binary"
"errors"
"io"
)
var (
// ErrCorrupt reports that the input is invalid.
ErrCorrupt = errors.New("snappy: corrupt input")
// ErrTooLarge reports that the uncompressed length is too large.
ErrTooLarge = errors.New("snappy: decoded block is too large")
// ErrUnsupported reports that the input isn't supported.
ErrUnsupported = errors.New("snappy: unsupported input")
errUnsupportedCopy4Tag = errors.New("snappy: unsupported COPY_4 tag")
errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
)
// DecodedLen returns the length of the decoded block.
func DecodedLen(src []byte) (int, error) {
v, _, err := decodedLen(src)
return v, err
}
// decodedLen returns the length of the decoded block and the number of bytes
// that the length header occupied.
func decodedLen(src []byte) (blockLen, headerLen int, err error) {
v, n := binary.Uvarint(src)
if n <= 0 || v > 0xffffffff {
return 0, 0, ErrCorrupt
}
const wordSize = 32 << (^uint(0) >> 32 & 1)
if wordSize == 32 && v > 0x7fffffff {
return 0, 0, ErrTooLarge
}
return int(v), n, nil
}
// Decode returns the decoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire decoded block.
// Otherwise, a newly allocated slice will be returned.
// It is valid to pass a nil dst.
func Decode(dst, src []byte) ([]byte, error) {
dLen, s, err := decodedLen(src)
if err != nil {
return nil, err
}
if len(dst) < dLen {
dst = make([]byte, dLen)
}
var d, offset, length int
for s < len(src) {
switch src[s] & 0x03 {
case tagLiteral:
x := uint(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return nil, ErrCorrupt
}
x = uint(src[s-1])
case x == 61:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return nil, ErrCorrupt
}
x = uint(src[s-2]) | uint(src[s-1])<<8
case x == 62:
s += 4
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return nil, ErrCorrupt
}
x = uint(src[s-3]) | uint(src[s-2])<<8 | uint(src[s-1])<<16
case x == 63:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return nil, ErrCorrupt
}
x = uint(src[s-4]) | uint(src[s-3])<<8 | uint(src[s-2])<<16 | uint(src[s-1])<<24
}
length = int(x + 1)
if length <= 0 {
return nil, errUnsupportedLiteralLength
}
if length > len(dst)-d || length > len(src)-s {
return nil, ErrCorrupt
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
if s > len(src) {
return nil, ErrCorrupt
}
length = 4 + int(src[s-2])>>2&0x7
offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
case tagCopy2:
s += 3
if s > len(src) {
return nil, ErrCorrupt
}
length = 1 + int(src[s-3])>>2
offset = int(src[s-2]) | int(src[s-1])<<8
case tagCopy4:
return nil, errUnsupportedCopy4Tag
}
if offset <= 0 || d < offset || length > len(dst)-d {
return nil, ErrCorrupt
}
for end := d + length; d != end; d++ {
dst[d] = dst[d-offset]
}
}
if d != dLen {
return nil, ErrCorrupt
}
return dst[:d], nil
}
// NewReader returns a new Reader that decompresses from r, using the framing
// format described at
// https://github.com/google/snappy/blob/master/framing_format.txt
func NewReader(r io.Reader) *Reader {
return &Reader{
r: r,
decoded: make([]byte, maxUncompressedChunkLen),
buf: make([]byte, maxEncodedLenOfMaxUncompressedChunkLen+checksumSize),
}
}
// Reader is an io.Reader that can read Snappy-compressed bytes.
type Reader struct {
r io.Reader
err error
decoded []byte
buf []byte
// decoded[i:j] contains decoded bytes that have not yet been passed on.
i, j int
readHeader bool
}
// Reset discards any buffered data, resets all state, and switches the Snappy
// reader to read from r. This permits reusing a Reader rather than allocating
// a new one.
func (r *Reader) Reset(reader io.Reader) {
r.r = reader
r.err = nil
r.i = 0
r.j = 0
r.readHeader = false
}
func (r *Reader) readFull(p []byte) (ok bool) {
if _, r.err = io.ReadFull(r.r, p); r.err != nil {
if r.err == io.ErrUnexpectedEOF {
r.err = ErrCorrupt
}
return false
}
return true
}
// Read satisfies the io.Reader interface.
func (r *Reader) Read(p []byte) (int, error) {
if r.err != nil {
return 0, r.err
}
for {
if r.i < r.j {
n := copy(p, r.decoded[r.i:r.j])
r.i += n
return n, nil
}
if !r.readFull(r.buf[:4]) {
return 0, r.err
}
chunkType := r.buf[0]
if !r.readHeader {
if chunkType != chunkTypeStreamIdentifier {
r.err = ErrCorrupt
return 0, r.err
}
r.readHeader = true
}
chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
if chunkLen > len(r.buf) {
r.err = ErrUnsupported
return 0, r.err
}
// The chunk types are specified at
// https://github.com/google/snappy/blob/master/framing_format.txt
switch chunkType {
case chunkTypeCompressedData:
// Section 4.2. Compressed data (chunk type 0x00).
if chunkLen < checksumSize {
r.err = ErrCorrupt
return 0, r.err
}
buf := r.buf[:chunkLen]
if !r.readFull(buf) {
return 0, r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
buf = buf[checksumSize:]
n, err := DecodedLen(buf)
if err != nil {
r.err = err
return 0, r.err
}
if n > len(r.decoded) {
r.err = ErrCorrupt
return 0, r.err
}
if _, err := Decode(r.decoded, buf); err != nil {
r.err = err
return 0, r.err
}
if crc(r.decoded[:n]) != checksum {
r.err = ErrCorrupt
return 0, r.err
}
r.i, r.j = 0, n
continue
case chunkTypeUncompressedData:
// Section 4.3. Uncompressed data (chunk type 0x01).
if chunkLen < checksumSize {
r.err = ErrCorrupt
return 0, r.err
}
buf := r.buf[:checksumSize]
if !r.readFull(buf) {
return 0, r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
// Read directly into r.decoded instead of via r.buf.
n := chunkLen - checksumSize
if !r.readFull(r.decoded[:n]) {
return 0, r.err
}
if crc(r.decoded[:n]) != checksum {
r.err = ErrCorrupt
return 0, r.err
}
r.i, r.j = 0, n
continue
case chunkTypeStreamIdentifier:
// Section 4.1. Stream identifier (chunk type 0xff).
if chunkLen != len(magicBody) {
r.err = ErrCorrupt
return 0, r.err
}
if !r.readFull(r.buf[:len(magicBody)]) {
return 0, r.err
}
for i := 0; i < len(magicBody); i++ {
if r.buf[i] != magicBody[i] {
r.err = ErrCorrupt
return 0, r.err
}
}
continue
}
if chunkType <= 0x7f {
// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
r.err = ErrUnsupported
return 0, r.err
}
// Section 4.4 Padding (chunk type 0xfe).
// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
if !r.readFull(r.buf[:chunkLen]) {
return 0, r.err
}
}
}
This diff is collapsed.
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package snappy implements the snappy block-based compression format.
// It aims for very high speeds and reasonable compression.
//
// The C++ snappy implementation is at https://github.com/google/snappy
package snappy
import (
"hash/crc32"
)
/*
Each encoded block begins with the varint-encoded length of the decoded data,
followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
first byte of each chunk is broken into its 2 least and 6 most significant bits
called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
Zero means a literal tag. All other values mean a copy tag.
For literal tags:
- If m < 60, the next 1 + m bytes are literal bytes.
- Otherwise, let n be the little-endian unsigned integer denoted by the next
m - 59 bytes. The next 1 + n bytes after that are literal bytes.
For copy tags, length bytes are copied from offset bytes ago, in the style of
Lempel-Ziv compression algorithms. In particular:
- For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
of the offset. The next byte is bits 0-7 of the offset.
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
The length is 1 + m. The offset is the little-endian unsigned integer
denoted by the next 2 bytes.
- For l == 3, this tag is a legacy format that is no longer supported.
*/
const (
tagLiteral = 0x00
tagCopy1 = 0x01
tagCopy2 = 0x02
tagCopy4 = 0x03
)
const (
checksumSize = 4
chunkHeaderSize = 4
magicChunk = "\xff\x06\x00\x00" + magicBody
magicBody = "sNaPpY"
// https://github.com/google/snappy/blob/master/framing_format.txt says
// that "the uncompressed data in a chunk must be no longer than 65536 bytes".
maxUncompressedChunkLen = 65536
// maxEncodedLenOfMaxUncompressedChunkLen equals
// MaxEncodedLen(maxUncompressedChunkLen), but is hard coded to be a const
// instead of a variable, so that obufLen can also be a const. Their
// equivalence is confirmed by TestMaxEncodedLenOfMaxUncompressedChunkLen.
maxEncodedLenOfMaxUncompressedChunkLen = 76490
obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize
obufLen = obufHeaderLen + maxEncodedLenOfMaxUncompressedChunkLen
)
const (
chunkTypeCompressedData = 0x00
chunkTypeUncompressedData = 0x01
chunkTypePadding = 0xfe
chunkTypeStreamIdentifier = 0xff
)
var crcTable = crc32.MakeTable(crc32.Castagnoli)
// crc implements the checksum specified in section 3 of
// https://github.com/google/snappy/blob/master/framing_format.txt
func crc(b []byte) uint32 {
c := crc32.Update(0, crcTable, b)
return uint32(c>>15|c<<17) + 0xa282ead8
}
Copyright (c) 2012 Jeffrey M Hodges
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# levigo
levigo is a Go wrapper for LevelDB.
The API has been godoc'ed and [is available on the
web](http://godoc.org/github.com/jmhodges/levigo).
Questions answered at `golang-nuts@googlegroups.com`.
## Building
You'll need the shared library build of
[LevelDB](http://code.google.com/p/leveldb/) installed on your machine. The
current LevelDB will build it by default.
The minimum version of LevelDB required is currently 1.6. If you require the
use of an older version of LevelDB, see the [fork of levigo for LevelDB
1.4](https://github.com/jmhodges/levigo_leveldb_1.4). Prefer putting in the
work to be up to date as LevelDB moves very quickly.
Now, if you build LevelDB and put the shared library and headers in one of the
standard places for your OS, you'll be able to simply run:
go get github.com/jmhodges/levigo
But, suppose you put the shared LevelDB library somewhere weird like
/path/to/lib and the headers were installed in /path/to/include. To install
levigo remotely, you'll run:
CGO_CFLAGS="-I/path/to/leveldb/include" CGO_LDFLAGS="-L/path/to/leveldb/lib" go get github.com/jmhodges/levigo
and there you go.
In order to build with snappy, you'll have to explicitly add "-lsnappy" to the
`CGO_LDFLAGS`. Supposing that both snappy and leveldb are in weird places,
you'll run something like:
CGO_CFLAGS="-I/path/to/leveldb/include -I/path/to/snappy/include"
CGO_LDFLAGS="-L/path/to/leveldb/lib -L/path/to/snappy/lib -lsnappy" go get github.com/jmhodges/levigo
(and make sure the -lsnappy is after the snappy library path!).
Of course, these same rules apply when doing `go build`, as well.
## Caveats
Comparators and WriteBatch iterators must be written in C in your own
library. This seems like a pain in the ass, but remember that you'll have the
LevelDB C API available to your in your client package when you import levigo.
An example of writing your own Comparator can be found in
<https://github.com/jmhodges/levigo/blob/master/examples>.
package levigo
// #cgo LDFLAGS: -lleveldb
// #include "leveldb/c.h"
import "C"
import (
"unsafe"
)
// WriteBatch is a batching of Puts, and Deletes to be written atomically to a
// database. A WriteBatch is written when passed to DB.Write.
//
// To prevent memory leaks, call Close when the program no longer needs the
// WriteBatch object.