Commit 42be2b91 authored by ale's avatar ale

Update dependencies

parent c48112d1
Pipeline #1284 passed with stages
in 1 minute and 30 seconds
......@@ -7,4 +7,5 @@ Bob Potter (@bpot),
Tyson Maly (@tvmaly),
Will Glynn (@willglynn),
Brent Pedersen (@brentp)
Maciej Biłas (@maciej)
Maciej Biłas (@maciej),
Joe Nall (@joenall)
......@@ -7,5 +7,9 @@ Bob Potter (@bpot),
Tyson Maly (@tvmaly),
Will Glynn (@willglynn),
Brent Pedersen (@brentp),
Jason E. Aten (@glycerine)
Vali Malinoiu (@0x4139)
Jason E. Aten (@glycerine),
Vali Malinoiu (@0x4139),
Forud Ghafouri (@fzerorubigd),
Joe Nall (@joenall),
(@fredim),
Edd Robinson (@e-dard)
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke rle backrle ser fetch-real-roaring-datasets
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets
......@@ -24,7 +24,9 @@ help:
@echo " make clean : Remove any build artifact"
@echo " make nuke : Deletes any intermediate file"
@echo ""
@echo " make fuzz : Fuzzy testing"
@echo " make fuzz-smat : Fuzzy testing with smat"
@echo " make fuzz-stream : Fuzzy testing with stream deserialization"
@echo " make fuzz-buffer : Fuzzy testing with buffer deserialization"
@echo ""
# Alias for help target
......@@ -71,9 +73,19 @@ deps:
GOPATH=$(GOPATH) go get github.com/philhofer/fwd
GOPATH=$(GOPATH) go get github.com/jtolds/gls
fuzz:
fuzz-smat:
go test -tags=gofuzz -run=TestGenerateSmatCorpus
go-fuzz-build github.com/RoaringBitmap/roaring
go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
fuzz-stream:
go-fuzz-build -func FuzzSerializationStream github.com/RoaringBitmap/roaring
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
fuzz-buffer:
go-fuzz-build -func FuzzSerializationBuffer github.com/RoaringBitmap/roaring
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
# Remove any build artifact
......@@ -85,18 +97,8 @@ nuke:
rm -rf ./target
GOPATH=$(GOPATH) go clean -i ./...
rle:
cp rle.go rle16.go
perl -pi -e 's/32/16/g' rle16.go
cp rle_test.go rle16_test.go
perl -pi -e 's/32/16/g' rle16_test.go
backrle:
cp rle16.go rle.go
perl -pi -e 's/16/32/g' rle.go
perl -pi -e 's/2032/2016/g' rle.go
ser: rle
ser:
go generate
cover:
......
roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
=============
This is a go port of the Roaring bitmap data structure.
This is a go version of the Roaring bitmap data structure.
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
......@@ -29,6 +30,12 @@ Roaring bitmaps are found to work well in many important applications:
The ``roaring`` Go library is used by
* [Cloud Torrent](https://github.com/jpillora/cloud-torrent): a self-hosted remote torrent client
* [runv](https://github.com/hyperhq/runv): an Hypervisor-based runtime for the Open Containers Initiative
* [InfluxDB](https://www.influxdata.com)
* [Pilosa](https://www.pilosa.com/)
* [Bleve](http://www.blevesearch.com)
This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com).
There are also [Java](https://github.com/RoaringBitmap/RoaringBitmap) and [C/C++](https://github.com/RoaringBitmap/CRoaring) versions. The Java, C, C++ and Go version are binary compatible: e.g, you can save bitmaps
from a Java program and load them back in Go, and vice versa. We have a [format specification](https://github.com/RoaringBitmap/RoaringFormatSpec).
......@@ -36,17 +43,17 @@ from a Java program and load them back in Go, and vice versa. We have a [format
This code is licensed under Apache License, Version 2.0 (ASL2.0).
Copyright 2016 by the authors.
Copyright 2016-... by the authors.
### References
- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience 48 (4), 2018 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
- Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin,
Better bitmap performance with Roaring bitmaps,
Software: Practice and Experience Volume 46, Issue 5, pages 709–719, May 2016
Software: Practice and Experience 46 (5), 2016.
http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/realroaring2014.html
- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience (accepted in 2016, to appear) http://arxiv.org/abs/1603.06549
- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. http://arxiv.org/abs/1603.06549
### Dependencies
......@@ -91,7 +98,7 @@ func main() {
rb2 := roaring.BitmapOf(3, 4, 1000)
fmt.Println(rb2.String())
rb3 := roaring.NewBitmap()
rb3 := roaring.New()
fmt.Println(rb3.String())
fmt.Println("Cardinality: ", rb1.GetCardinality())
......@@ -106,9 +113,9 @@ func main() {
rb3.Or(rb1)
// computes union of the three bitmaps in parallel using 4 workers
ParOr(4, rb1, rb2, rb3)
roaring.ParOr(4, rb1, rb2, rb3)
// computes intersection of the three bitmaps in parallel using 4 workers
ParAnd(4, rb1, rb2, rb3)
roaring.ParAnd(4, rb1, rb2, rb3)
// prints 1, 3, 4, 5, 1000
......@@ -121,11 +128,12 @@ func main() {
// next we include an example of serialization
buf := new(bytes.Buffer)
rb1.WriteTo(buf) // we omit error handling
newrb:= roaring.NewBitmap()
newrb:= roaring.New()
newrb.ReadFrom(buf)
if rb1.Equals(newrb) {
fmt.Println("I wrote the content to a byte stream and read it back.")
}
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
}
```
......@@ -139,7 +147,7 @@ consider the following sample of code:
if err != nil {
t.Errorf("Failed writing")
}
newrb:= NewBitmap()
newrb:= New()
size,err=newrb.ReadFrom(buf)
if err != nil {
t.Errorf("Failed reading")
......@@ -184,6 +192,14 @@ https://coveralls.io/github/RoaringBitmap/roaring?branch=master
Type
go test -bench Benchmark -run -
To run benchmarks on [Real Roaring Datasets](https://github.com/RoaringBitmap/real-roaring-datasets)
run the following:
```sh
go get github.com/RoaringBitmap/real-roaring-datasets
BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run -
```
### Iterative use
......
......@@ -28,6 +28,14 @@ func (ac *arrayContainer) getShortIterator() shortIterable {
return &shortIterator{ac.content, 0}
}
func (ac *arrayContainer) getReverseIterator() shortIterable {
return &reverseIterator{ac.content, len(ac.content) - 1}
}
func (ac *arrayContainer) getManyIterator() manyIterable {
return &manyIterator{ac.content, 0}
}
func (ac *arrayContainer) minimum() uint16 {
return ac.content[0] // assume not empty
}
......@@ -111,7 +119,6 @@ func (ac *arrayContainer) iremoveRange(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,endx)
func (ac *arrayContainer) not(firstOfRange, endx int) container {
if firstOfRange >= endx {
//p("arrayContainer.not(): exiting early with ac.clone()")
return ac.clone()
}
return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1]
......@@ -120,18 +127,15 @@ func (ac *arrayContainer) not(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,lastOfRange]
func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
//p("arrayContainer.notClose(): exiting early with ac.clone()")
return ac.clone()
}
// determine the span of array indices to be affected^M
startIndex := binarySearch(ac.content, uint16(firstOfRange))
//p("startIndex=%v", startIndex)
if startIndex < 0 {
startIndex = -startIndex - 1
}
lastIndex := binarySearch(ac.content, uint16(lastOfRange))
//p("lastIndex=%v", lastIndex)
if lastIndex < 0 {
lastIndex = -lastIndex - 2
}
......@@ -140,9 +144,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
newValuesInRange := spanToBeFlipped - currentValuesInRange
cardinalityChange := newValuesInRange - currentValuesInRange
newCardinality := len(ac.content) + cardinalityChange
//p("new card is %v", newCardinality)
if newCardinality > arrayDefaultMaxSize {
//p("new card over arrayDefaultMaxSize, so returning bitmap")
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
}
answer := newArrayContainer()
......@@ -329,7 +331,8 @@ func (ac *arrayContainer) ior(a container) container {
case *arrayContainer:
return ac.iorArray(x)
case *bitmapContainer:
return ac.iorBitmap(x)
return a.(*bitmapContainer).orArray(ac)
//return ac.iorBitmap(x) // note: this does not make sense
case *runContainer16:
if x.isFull() {
return x.clone()
......@@ -339,14 +342,44 @@ func (ac *arrayContainer) ior(a container) container {
panic("unsupported container type")
}
func (ac *arrayContainer) iorArray(ac2 *arrayContainer) container {
bc1 := ac.toBitmapContainer()
bc2 := ac2.toBitmapContainer()
bc1.iorBitmap(bc2)
*ac = *newArrayContainerFromBitmap(bc1)
func (ac *arrayContainer) iorArray(value2 *arrayContainer) container {
value1 := ac
len1 := value1.getCardinality()
len2 := value2.getCardinality()
maxPossibleCardinality := len1 + len2
if maxPossibleCardinality > arrayDefaultMaxSize { // it could be a bitmap!
bc := newBitmapContainer()
for k := 0; k < len(value2.content); k++ {
v := value2.content[k]
i := uint(v) >> 6
mask := uint64(1) << (v % 64)
bc.bitmap[i] |= mask
}
for k := 0; k < len(ac.content); k++ {
v := ac.content[k]
i := uint(v) >> 6
mask := uint64(1) << (v % 64)
bc.bitmap[i] |= mask
}
bc.cardinality = int(popcntSlice(bc.bitmap))
if bc.cardinality <= arrayDefaultMaxSize {
return bc.toArrayContainer()
}
return bc
}
if maxPossibleCardinality > cap(value1.content) {
newcontent := make([]uint16, 0, maxPossibleCardinality)
copy(newcontent[len2:maxPossibleCardinality], ac.content[0:len1])
ac.content = newcontent
} else {
copy(ac.content[len2:maxPossibleCardinality], ac.content[0:len1])
}
nl := union2by2(value1.content[len2:maxPossibleCardinality], value2.content, ac.content)
ac.content = ac.content[:nl] // reslice to match actual used capacity
return ac
}
// Note: such code does not make practical sense, except for lazy evaluations
func (ac *arrayContainer) iorBitmap(bc2 *bitmapContainer) container {
bc1 := ac.toBitmapContainer()
bc1.iorBitmap(bc2)
......@@ -468,7 +501,6 @@ func (ac *arrayContainer) lazyorArray(value2 *arrayContainer) container {
}
func (ac *arrayContainer) and(a container) container {
//p("ac.and() called")
switch x := a.(type) {
case *arrayContainer:
return ac.andArray(x)
......@@ -515,7 +547,7 @@ func (ac *arrayContainer) iand(a container) container {
return ac.iandBitmap(x)
case *runContainer16:
if x.isFull() {
return ac.clone()
return ac
}
return x.andArray(ac)
}
......@@ -687,7 +719,6 @@ func (ac *arrayContainer) inot(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,lastOfRange]
func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
//p("ac.inotClose() starting")
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
return ac
}
......@@ -710,7 +741,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
if cardinalityChange > 0 {
if newCardinality > len(ac.content) {
if newCardinality > arrayDefaultMaxSize {
//p("ac.inotClose() converting to bitmap and doing inot there")
bcRet := ac.toBitmapContainer()
bcRet.inot(firstOfRange, lastOfRange+1)
*ac = *bcRet.toArrayContainer()
......@@ -731,7 +761,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
}
}
ac.content = ac.content[:newCardinality]
//p("bottom of ac.inotClose(): returning ac")
return ac
}
......@@ -768,19 +797,12 @@ func (ac *arrayContainer) negateRange(buffer []uint16, startIndex, lastIndex, st
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func (ac *arrayContainer) isFull() bool {
return false
}
func (ac *arrayContainer) andArray(value2 *arrayContainer) container {
desiredcapacity := min(ac.getCardinality(), value2.getCardinality())
desiredcapacity := minOfInt(ac.getCardinality(), value2.getCardinality())
answer := newArrayContainerCapacity(desiredcapacity)
length := intersection2by2(
ac.content,
......@@ -918,7 +940,7 @@ func (ac *arrayContainer) toEfficientContainer() container {
card := ac.getCardinality()
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
if sizeAsRunContainer <= min(sizeAsBitmapContainer, sizeAsArrayContainer) {
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
return newRunContainer16FromArray(ac)
}
if card <= arrayDefaultMaxSize {
......@@ -930,3 +952,17 @@ func (ac *arrayContainer) toEfficientContainer() container {
func (ac *arrayContainer) containerType() contype {
return arrayContype
}
func (ac *arrayContainer) addOffset(x uint16) []container {
low := &arrayContainer{}
high := &arrayContainer{}
for _, val := range ac.content {
y := uint32(val) + uint32(x)
if highbits(y) > 0 {
high.content = append(high.content, lowbits(y))
} else {
low.content = append(low.content, lowbits(y))
}
}
return []container{low, high}
}
......@@ -6,7 +6,7 @@ package roaring
import "github.com/tinylib/msgp/msgp"
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
......@@ -49,7 +49,7 @@ func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 1
// write "content"
......@@ -70,7 +70,7 @@ func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 1
......@@ -83,7 +83,7 @@ func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
......@@ -127,7 +127,7 @@ func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *arrayContainer) Msgsize() (s int) {
s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size))
return
......
......@@ -6,7 +6,7 @@ package roaring
import "github.com/tinylib/msgp/msgp"
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
......@@ -54,7 +54,7 @@ func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "cardinality"
......@@ -84,7 +84,7 @@ func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
......@@ -100,7 +100,7 @@ func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
......@@ -149,13 +149,13 @@ func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *bitmapContainer) Msgsize() (s int) {
s = 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.bitmap) * (msgp.Uint64Size))
return
}
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
......@@ -239,7 +239,7 @@ func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "ptr"
......@@ -291,7 +291,7 @@ func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler