Commit 42be2b91 authored by ale's avatar ale

Update dependencies

parent c48112d1
Pipeline #1284 passed with stages
in 1 minute and 30 seconds
......@@ -7,4 +7,5 @@ Bob Potter (@bpot),
Tyson Maly (@tvmaly),
Will Glynn (@willglynn),
Brent Pedersen (@brentp)
Maciej Biłas (@maciej)
Maciej Biłas (@maciej),
Joe Nall (@joenall)
......@@ -7,5 +7,9 @@ Bob Potter (@bpot),
Tyson Maly (@tvmaly),
Will Glynn (@willglynn),
Brent Pedersen (@brentp),
Jason E. Aten (@glycerine)
Vali Malinoiu (@0x4139)
Jason E. Aten (@glycerine),
Vali Malinoiu (@0x4139),
Forud Ghafouri (@fzerorubigd),
Joe Nall (@joenall),
(@fredim),
Edd Robinson (@e-dard)
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke rle backrle ser fetch-real-roaring-datasets
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets
......@@ -24,7 +24,9 @@ help:
@echo " make clean : Remove any build artifact"
@echo " make nuke : Deletes any intermediate file"
@echo ""
@echo " make fuzz : Fuzzy testing"
@echo " make fuzz-smat : Fuzzy testing with smat"
@echo " make fuzz-stream : Fuzzy testing with stream deserialization"
@echo " make fuzz-buffer : Fuzzy testing with buffer deserialization"
@echo ""
# Alias for help target
......@@ -71,9 +73,19 @@ deps:
GOPATH=$(GOPATH) go get github.com/philhofer/fwd
GOPATH=$(GOPATH) go get github.com/jtolds/gls
fuzz:
fuzz-smat:
go test -tags=gofuzz -run=TestGenerateSmatCorpus
go-fuzz-build github.com/RoaringBitmap/roaring
go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
fuzz-stream:
go-fuzz-build -func FuzzSerializationStream github.com/RoaringBitmap/roaring
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
fuzz-buffer:
go-fuzz-build -func FuzzSerializationBuffer github.com/RoaringBitmap/roaring
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
# Remove any build artifact
......@@ -85,18 +97,8 @@ nuke:
rm -rf ./target
GOPATH=$(GOPATH) go clean -i ./...
rle:
cp rle.go rle16.go
perl -pi -e 's/32/16/g' rle16.go
cp rle_test.go rle16_test.go
perl -pi -e 's/32/16/g' rle16_test.go
backrle:
cp rle16.go rle.go
perl -pi -e 's/16/32/g' rle.go
perl -pi -e 's/2032/2016/g' rle.go
ser: rle
ser:
go generate
cover:
......
roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
=============
This is a go port of the Roaring bitmap data structure.
This is a go version of the Roaring bitmap data structure.
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
......@@ -29,6 +30,12 @@ Roaring bitmaps are found to work well in many important applications:
The ``roaring`` Go library is used by
* [Cloud Torrent](https://github.com/jpillora/cloud-torrent): a self-hosted remote torrent client
* [runv](https://github.com/hyperhq/runv): an Hypervisor-based runtime for the Open Containers Initiative
* [InfluxDB](https://www.influxdata.com)
* [Pilosa](https://www.pilosa.com/)
* [Bleve](http://www.blevesearch.com)
This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com).
There are also [Java](https://github.com/RoaringBitmap/RoaringBitmap) and [C/C++](https://github.com/RoaringBitmap/CRoaring) versions. The Java, C, C++ and Go version are binary compatible: e.g, you can save bitmaps
from a Java program and load them back in Go, and vice versa. We have a [format specification](https://github.com/RoaringBitmap/RoaringFormatSpec).
......@@ -36,17 +43,17 @@ from a Java program and load them back in Go, and vice versa. We have a [format
This code is licensed under Apache License, Version 2.0 (ASL2.0).
Copyright 2016 by the authors.
Copyright 2016-... by the authors.
### References
- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience 48 (4), 2018 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
- Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin,
Better bitmap performance with Roaring bitmaps,
Software: Practice and Experience Volume 46, Issue 5, pages 709–719, May 2016
Software: Practice and Experience 46 (5), 2016.
http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/realroaring2014.html
- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience (accepted in 2016, to appear) http://arxiv.org/abs/1603.06549
- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. http://arxiv.org/abs/1603.06549
### Dependencies
......@@ -91,7 +98,7 @@ func main() {
rb2 := roaring.BitmapOf(3, 4, 1000)
fmt.Println(rb2.String())
rb3 := roaring.NewBitmap()
rb3 := roaring.New()
fmt.Println(rb3.String())
fmt.Println("Cardinality: ", rb1.GetCardinality())
......@@ -106,9 +113,9 @@ func main() {
rb3.Or(rb1)
// computes union of the three bitmaps in parallel using 4 workers
ParOr(4, rb1, rb2, rb3)
roaring.ParOr(4, rb1, rb2, rb3)
// computes intersection of the three bitmaps in parallel using 4 workers
ParAnd(4, rb1, rb2, rb3)
roaring.ParAnd(4, rb1, rb2, rb3)
// prints 1, 3, 4, 5, 1000
......@@ -121,11 +128,12 @@ func main() {
// next we include an example of serialization
buf := new(bytes.Buffer)
rb1.WriteTo(buf) // we omit error handling
newrb:= roaring.NewBitmap()
newrb:= roaring.New()
newrb.ReadFrom(buf)
if rb1.Equals(newrb) {
fmt.Println("I wrote the content to a byte stream and read it back.")
}
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
}
```
......@@ -139,7 +147,7 @@ consider the following sample of code:
if err != nil {
t.Errorf("Failed writing")
}
newrb:= NewBitmap()
newrb:= New()
size,err=newrb.ReadFrom(buf)
if err != nil {
t.Errorf("Failed reading")
......@@ -184,6 +192,14 @@ https://coveralls.io/github/RoaringBitmap/roaring?branch=master
Type
go test -bench Benchmark -run -
To run benchmarks on [Real Roaring Datasets](https://github.com/RoaringBitmap/real-roaring-datasets)
run the following:
```sh
go get github.com/RoaringBitmap/real-roaring-datasets
BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run -
```
### Iterative use
......
......@@ -28,6 +28,14 @@ func (ac *arrayContainer) getShortIterator() shortIterable {
return &shortIterator{ac.content, 0}
}
func (ac *arrayContainer) getReverseIterator() shortIterable {
return &reverseIterator{ac.content, len(ac.content) - 1}
}
func (ac *arrayContainer) getManyIterator() manyIterable {
return &manyIterator{ac.content, 0}
}
func (ac *arrayContainer) minimum() uint16 {
return ac.content[0] // assume not empty
}
......@@ -111,7 +119,6 @@ func (ac *arrayContainer) iremoveRange(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,endx)
func (ac *arrayContainer) not(firstOfRange, endx int) container {
if firstOfRange >= endx {
//p("arrayContainer.not(): exiting early with ac.clone()")
return ac.clone()
}
return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1]
......@@ -120,18 +127,15 @@ func (ac *arrayContainer) not(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,lastOfRange]
func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
//p("arrayContainer.notClose(): exiting early with ac.clone()")
return ac.clone()
}
// determine the span of array indices to be affected^M
startIndex := binarySearch(ac.content, uint16(firstOfRange))
//p("startIndex=%v", startIndex)
if startIndex < 0 {
startIndex = -startIndex - 1
}
lastIndex := binarySearch(ac.content, uint16(lastOfRange))
//p("lastIndex=%v", lastIndex)
if lastIndex < 0 {
lastIndex = -lastIndex - 2
}
......@@ -140,9 +144,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
newValuesInRange := spanToBeFlipped - currentValuesInRange
cardinalityChange := newValuesInRange - currentValuesInRange
newCardinality := len(ac.content) + cardinalityChange
//p("new card is %v", newCardinality)
if newCardinality > arrayDefaultMaxSize {
//p("new card over arrayDefaultMaxSize, so returning bitmap")
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
}
answer := newArrayContainer()
......@@ -329,7 +331,8 @@ func (ac *arrayContainer) ior(a container) container {
case *arrayContainer:
return ac.iorArray(x)
case *bitmapContainer:
return ac.iorBitmap(x)
return a.(*bitmapContainer).orArray(ac)
//return ac.iorBitmap(x) // note: this does not make sense
case *runContainer16:
if x.isFull() {
return x.clone()
......@@ -339,14 +342,44 @@ func (ac *arrayContainer) ior(a container) container {
panic("unsupported container type")
}
func (ac *arrayContainer) iorArray(ac2 *arrayContainer) container {
bc1 := ac.toBitmapContainer()
bc2 := ac2.toBitmapContainer()
bc1.iorBitmap(bc2)
*ac = *newArrayContainerFromBitmap(bc1)
func (ac *arrayContainer) iorArray(value2 *arrayContainer) container {
value1 := ac
len1 := value1.getCardinality()
len2 := value2.getCardinality()
maxPossibleCardinality := len1 + len2
if maxPossibleCardinality > arrayDefaultMaxSize { // it could be a bitmap!
bc := newBitmapContainer()
for k := 0; k < len(value2.content); k++ {
v := value2.content[k]
i := uint(v) >> 6
mask := uint64(1) << (v % 64)
bc.bitmap[i] |= mask
}
for k := 0; k < len(ac.content); k++ {
v := ac.content[k]
i := uint(v) >> 6
mask := uint64(1) << (v % 64)
bc.bitmap[i] |= mask
}
bc.cardinality = int(popcntSlice(bc.bitmap))
if bc.cardinality <= arrayDefaultMaxSize {
return bc.toArrayContainer()
}
return bc
}
if maxPossibleCardinality > cap(value1.content) {
newcontent := make([]uint16, 0, maxPossibleCardinality)
copy(newcontent[len2:maxPossibleCardinality], ac.content[0:len1])
ac.content = newcontent
} else {
copy(ac.content[len2:maxPossibleCardinality], ac.content[0:len1])
}
nl := union2by2(value1.content[len2:maxPossibleCardinality], value2.content, ac.content)
ac.content = ac.content[:nl] // reslice to match actual used capacity
return ac
}
// Note: such code does not make practical sense, except for lazy evaluations
func (ac *arrayContainer) iorBitmap(bc2 *bitmapContainer) container {
bc1 := ac.toBitmapContainer()
bc1.iorBitmap(bc2)
......@@ -468,7 +501,6 @@ func (ac *arrayContainer) lazyorArray(value2 *arrayContainer) container {
}
func (ac *arrayContainer) and(a container) container {
//p("ac.and() called")
switch x := a.(type) {
case *arrayContainer:
return ac.andArray(x)
......@@ -515,7 +547,7 @@ func (ac *arrayContainer) iand(a container) container {
return ac.iandBitmap(x)
case *runContainer16:
if x.isFull() {
return ac.clone()
return ac
}
return x.andArray(ac)
}
......@@ -687,7 +719,6 @@ func (ac *arrayContainer) inot(firstOfRange, endx int) container {
// flip the values in the range [firstOfRange,lastOfRange]
func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
//p("ac.inotClose() starting")
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
return ac
}
......@@ -710,7 +741,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
if cardinalityChange > 0 {
if newCardinality > len(ac.content) {
if newCardinality > arrayDefaultMaxSize {
//p("ac.inotClose() converting to bitmap and doing inot there")
bcRet := ac.toBitmapContainer()
bcRet.inot(firstOfRange, lastOfRange+1)
*ac = *bcRet.toArrayContainer()
......@@ -731,7 +761,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
}
}
ac.content = ac.content[:newCardinality]
//p("bottom of ac.inotClose(): returning ac")
return ac
}
......@@ -768,19 +797,12 @@ func (ac *arrayContainer) negateRange(buffer []uint16, startIndex, lastIndex, st
}
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func (ac *arrayContainer) isFull() bool {
return false
}
func (ac *arrayContainer) andArray(value2 *arrayContainer) container {
desiredcapacity := min(ac.getCardinality(), value2.getCardinality())
desiredcapacity := minOfInt(ac.getCardinality(), value2.getCardinality())
answer := newArrayContainerCapacity(desiredcapacity)
length := intersection2by2(
ac.content,
......@@ -918,7 +940,7 @@ func (ac *arrayContainer) toEfficientContainer() container {
card := ac.getCardinality()
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
if sizeAsRunContainer <= min(sizeAsBitmapContainer, sizeAsArrayContainer) {
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
return newRunContainer16FromArray(ac)
}
if card <= arrayDefaultMaxSize {
......@@ -930,3 +952,17 @@ func (ac *arrayContainer) toEfficientContainer() container {
func (ac *arrayContainer) containerType() contype {
return arrayContype
}
func (ac *arrayContainer) addOffset(x uint16) []container {
low := &arrayContainer{}
high := &arrayContainer{}
for _, val := range ac.content {
y := uint32(val) + uint32(x)
if highbits(y) > 0 {
high.content = append(high.content, lowbits(y))
} else {
low.content = append(low.content, lowbits(y))
}
}
return []container{low, high}
}
......@@ -6,7 +6,7 @@ package roaring
import "github.com/tinylib/msgp/msgp"
// DecodeMsg implements msgp.Decodable
// Deprecated: DecodeMsg implements msgp.Decodable
func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
......@@ -49,7 +49,7 @@ func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
// EncodeMsg implements msgp.Encodable
// Deprecated: EncodeMsg implements msgp.Encodable
func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 1
// write "content"
......@@ -70,7 +70,7 @@ func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
return
}
// MarshalMsg implements msgp.Marshaler
// Deprecated: MarshalMsg implements msgp.Marshaler
func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 1
......@@ -83,7 +83,7 @@ func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
return
}
// UnmarshalMsg implements msgp.Unmarshaler
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
......@@ -127,7 +127,7 @@ func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *arrayContainer) Msgsize() (s int) {
s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size))
return
......
......@@ -109,20 +109,93 @@ func (bcsi *bitmapContainerShortIterator) next() uint16 {
func (bcsi *bitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0
}
func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator {
return &bitmapContainerShortIterator{a, a.NextSetBit(0)}
}
func (bc *bitmapContainer) getShortIterator() shortIterable {
return newBitmapContainerShortIterator(bc)
}
type reverseBitmapContainerShortIterator struct {
ptr *bitmapContainer
i int
}
func (bcsi *reverseBitmapContainerShortIterator) next() uint16 {
if bcsi.i == -1 {
panic("reverseBitmapContainerShortIterator.next() going beyond what is available")
}
j := bcsi.i
bcsi.i = bcsi.ptr.PrevSetBit(bcsi.i - 1)
return uint16(j)
}
func (bcsi *reverseBitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0
}
func newReverseBitmapContainerShortIterator(a *bitmapContainer) *reverseBitmapContainerShortIterator {
if a.cardinality == 0 {
return &reverseBitmapContainerShortIterator{a, -1}
}
return &reverseBitmapContainerShortIterator{a, int(a.maximum())}
}
func (bc *bitmapContainer) getReverseIterator() shortIterable {
return newReverseBitmapContainerShortIterator(bc)
}
type bitmapContainerManyIterator struct {
ptr *bitmapContainer
base int
bitset uint64
}
func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int {
n := 0
base := bcmi.base
bitset := bcmi.bitset
for n < len(buf) {
if bitset == 0 {
base++
if base >= len(bcmi.ptr.bitmap) {
bcmi.base = base
bcmi.bitset = bitset
return n
}
bitset = bcmi.ptr.bitmap[base]
continue
}
t := bitset & -bitset
buf[n] = uint32(((base * 64) + int(popcount(t-1)))) | hs
n = n + 1
bitset ^= t
}
bcmi.base = base
bcmi.bitset = bitset
return n
}
func newBitmapContainerManyIterator(a *bitmapContainer) *bitmapContainerManyIterator {
return &bitmapContainerManyIterator{a, -1, 0}
}
func (bc *bitmapContainer) getManyIterator() manyIterable {
return newBitmapContainerManyIterator(bc)
}
func (bc *bitmapContainer) getSizeInBytes() int {
return len(bc.bitmap) * 8 // + bcBaseBytes
}
func (bc *bitmapContainer) serializedSizeInBytes() int {
return bc.Msgsize()
//return len(bc.bitmap) * 8 // + bcBaseBytes
//return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes
return len(bc.bitmap) * 8
}
const bcBaseBytes = int(unsafe.Sizeof(bitmapContainer{}))
......@@ -134,16 +207,13 @@ func bitmapContainerSizeInBytes() int {
func bitmapEquals(a, b []uint64) bool {
if len(a) != len(b) {
//p("bitmaps differ on length. len(a)=%v; len(b)=%v", len(a), len(b))
return false
}
for i, v := range a {
if v != b[i] {
//p("bitmaps differ on element i=%v", i)
return false
}
}
//p("bitmapEquals returning true")
return true
}
......@@ -166,9 +236,7 @@ func (bc *bitmapContainer) fillLeastSignificant16bits(x []uint32, i int, mask ui
func (bc *bitmapContainer) equals(o container) bool {
srb, ok := o.(*bitmapContainer)
if ok {
//p("bitmapContainers.equals: both are bitmapContainers")
if srb.cardinality != bc.cardinality {
//p("bitmapContainers.equals: card differs: %v vs %v", srb.cardinality, bc.cardinality)
return false
}
return bitmapEquals(bc.bitmap, srb.bitmap)
......@@ -218,12 +286,6 @@ func (bc *bitmapContainer) iremoveReturnMinimized(i uint16) container {
// iremove returns true if i was found.
func (bc *bitmapContainer) iremove(i uint16) bool {
/* branchless code
w := bc.bitmap[i>>6]
mask := uint64(1) << (i % 64)
neww := w &^ mask
bc.cardinality -= int((w ^ neww) >> (i % 64))
bc.bitmap[i>>6] = neww */
if bc.contains(i) {
bc.cardinality--
bc.bitmap[i/64] &^= (uint64(1) << (i % 64))
......@@ -263,14 +325,10 @@ func (bc *bitmapContainer) iremoveRange(firstOfRange, lastOfRange int) container
// flip all values in range [firstOfRange,endx)
func (bc *bitmapContainer) inot(firstOfRange, endx int) container {
p("bc.inot() called with [%v, %v)", firstOfRange, endx)
if endx-firstOfRange == maxCapacity {
//p("endx-firstOfRange == maxCapacity")
flipBitmapRange(bc.bitmap, firstOfRange, endx)
bc.cardinality = maxCapacity - bc.cardinality
//p("bc.cardinality is now %v", bc.cardinality)
} else if endx-firstOfRange > maxCapacity/2 {
//p("endx-firstOfRange > maxCapacity/2")
flipBitmapRange(bc.bitmap, firstOfRange, endx)
bc.computeCardinality()
} else {
......@@ -347,13 +405,28 @@ func (bc *bitmapContainer) lazyIOR(a container) container {
if x.isFull() {
return x.clone()
}
// TODO : implement efficient in-place lazy OR to bitmap
for i := range x.iv {
setBitmapRange(bc.