diff --git a/CHANGELOG.md b/CHANGELOG.md index 0415285f8..24fc67522 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,36 @@ ## Changelog ##### master +* bump google.golang.org/grpc to fix vulnerability GHSA-m425-mq94-257g by @KacperLegowski in https://github.com/go-graphite/go-carbon/pull/574 +* Actions bump and fpm fix by @deniszh in https://github.com/go-graphite/go-carbon/pull/580 +* Add dependabot config by @RincewindsHat in https://github.com/go-graphite/go-carbon/pull/583 +* Remove old otel dependency, upgrade deps by @deniszh in https://github.com/go-graphite/go-carbon/pull/586 +* Adding HTTP GET handler for health check by @deniszh in https://github.com/go-graphite/go-carbon/pull/588 +* Using cuckoo filter for new metric detection instead of cache by @deniszh in https://github.com/go-graphite/go-carbon/pull/590 +* Let's not delete values from boom filter by @deniszh in https://github.com/go-graphite/go-carbon/pull/593 +* fix: panic on slice bounds out of range when preparing data stream by @dowster in https://github.com/go-graphite/go-carbon/pull/599 +* Speed up fetchData by @deniszh in https://github.com/go-graphite/go-carbon/pull/601 +* Make throughput quota config per minute by @emadolsky in https://github.com/go-graphite/go-carbon/pull/612 + +###### dependabot updates +* Bump github/codeql-action from 2 to 3 by @dependabot in https://github.com/go-graphite/go-carbon/pull/584 +* Bump golangci/golangci-lint-action from 4 to 5 by @dependabot in https://github.com/go-graphite/go-carbon/pull/585 +* Bump golangci/golangci-lint-action from 5 to 6 by @dependabot in https://github.com/go-graphite/go-carbon/pull/587 +* Bump docker/build-push-action from 5 to 6 by @dependabot in https://github.com/go-graphite/go-carbon/pull/598 +* Bump google.golang.org/grpc from 1.64.0 to 1.64.1 by @dependabot in https://github.com/go-graphite/go-carbon/pull/600 +* Bump google.golang.org/grpc from 1.64.1 to 1.65.0 by @dependabot in https://github.com/go-graphite/go-carbon/pull/602 +* Bump github.com/BurntSushi/toml from 1.3.2 to 1.4.0 by @dependabot in https://github.com/go-graphite/go-carbon/pull/603 +* Bump github.com/klauspost/compress from 1.17.8 to 1.17.9 by @dependabot in https://github.com/go-graphite/go-carbon/pull/604 +* Bump google.golang.org/protobuf from 1.34.1 to 1.34.2 by @dependabot in https://github.com/go-graphite/go-carbon/pull/605 +* Bump cloud.google.com/go/pubsub from 1.38.0 to 1.40.0 by @dependabot in https://github.com/go-graphite/go-carbon/pull/606 +* Bump golang.org/x/net from 0.26.0 to 0.27.0 by @dependabot in https://github.com/go-graphite/go-carbon/pull/607 +* Bump google.golang.org/api from 0.181.0 to 0.188.0 by @dependabot in https://github.com/go-graphite/go-carbon/pull/608 +* Bump google.golang.org/api from 0.188.0 to 0.190.0 by @dependabot in https://github.com/go-graphite/go-carbon/pull/610 +* Bump cloud.google.com/go/pubsub from 1.40.0 to 1.41.0 by @dependabot in https://github.com/go-graphite/go-carbon/pull/611 +* Bump google.golang.org/api from 0.190.0 to 0.191.0 by @dependabot in https://github.com/go-graphite/go-carbon/pull/613 +* Bump golang.org/x/net from 0.27.0 to 0.28.0 by @dependabot in https://github.com/go-graphite/go-carbon/pull/614 +* Bump github.com/prometheus/client_golang from 1.19.1 to 1.20.0 by @dependabot in https://github.com/go-graphite/go-carbon/pull/616 +* Bump google.golang.org/api from 0.191.0 to 0.192.0 by @dependabot in https://github.com/go-graphite/go-carbon/pull/617 +* Bump github.com/IBM/sarama from 1.43.2 to 1.43.3 by @dependabot in https://github.com/go-graphite/go-carbon/pull/618 ##### version 0.17.3 * Bump golang.org/x/net from 0.7.0 to 0.17.0 by @dependabot in #568 diff --git a/README.md b/README.md index e4fbe0bea..61e7605da 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,8 @@ max-size = 1000000 # "noop" - pick metrics to write in unspecified order, # requires least CPU and improves cache responsiveness write-strategy = "max" -# If > 0 use bloom filter to detect new metrics instead of cache +# If > 0 use bloom filter to detect new metrics instead of cache (EXPERIMENTAL) +# works better for multi-million metrics installations bloom-size = 0 [udp] diff --git a/cache/cache.go b/cache/cache.go index a2e4a7c57..179b3e2f4 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -11,11 +11,12 @@ import ( "sync/atomic" "time" - cuckoo "github.com/seiflotfy/cuckoofilter" + "github.com/cespare/xxhash/v2" "github.com/go-graphite/go-carbon/helper" "github.com/go-graphite/go-carbon/points" "github.com/go-graphite/go-carbon/tags" + "github.com/greatroar/blobloom" ) type WriteStrategy int @@ -60,8 +61,9 @@ type Cache struct { droppedRealtimeIndex uint32 // new metrics failed to be indexed in realtime } - newMetricsChan chan string - newMetricCf *cuckoo.Filter + newMetricsChan chan string + newMetricCf *blobloom.Filter + newMetricCfCapacity uint64 throttle func(ps *points.Points, inCache bool) bool } @@ -135,9 +137,13 @@ func (c *Cache) SetMaxSize(maxSize uint32) { } // SetBloomSize of bloom filter -func (c *Cache) SetBloomSize(bloomSize uint) { +func (c *Cache) SetBloomSize(bloomSize uint64) { if bloomSize > 0 { - c.newMetricCf = cuckoo.NewFilter(bloomSize) + c.newMetricCf = blobloom.NewOptimized(blobloom.Config{ + Capacity: bloomSize, // Expected number of keys. + FPRate: 1e-4, // Accept one false positive per 10,000 lookups. + }) + c.newMetricCfCapacity = bloomSize } } @@ -162,7 +168,12 @@ func (c *Cache) Stat(send helper.StatCallback) { send("notConfirmed", float64(c.NotConfirmedLength())) // report elements in bloom filter if c.newMetricCf != nil { - send("cfCount", float64(c.newMetricCf.Count())) + cfCount := c.newMetricCf.Cardinality() + if uint64(cfCount) > c.newMetricCfCapacity { + // full filter report +Inf cardinality + cfCount = float64(c.newMetricCfCapacity) + } + send("cfCount", cfCount) } helper.SendAndSubstractUint32("queries", &c.stat.queryCnt, send) @@ -336,10 +347,11 @@ func (c *Cache) Add(p *points.Points) { if c.newMetricsChan != nil && c.newMetricCf != nil { // add metric to new metric channel if missed in bloom // despite what we have it in cache (new behaviour) - if !c.newMetricCf.Lookup([]byte(p.Metric)) { + if !c.newMetricCf.Has(xxhash.Sum64([]byte(p.Metric))) { sendMetricToNewMetricChan(c, p.Metric) + c.newMetricCf.Add(xxhash.Sum64([]byte(p.Metric))) } - c.newMetricCf.Insert([]byte(p.Metric)) + } atomic.AddInt32(&c.stat.size, int32(count)) } diff --git a/cache/cache_test.go b/cache/cache_test.go index a7b413c81..f16d1953d 100644 --- a/cache/cache_test.go +++ b/cache/cache_test.go @@ -25,7 +25,7 @@ func TestCache(t *testing.T) { } // check if new metric added to bloom filter - if c.newMetricCf.Count() != 1 { + if c.newMetricCf.Empty() { t.FailNow() } diff --git a/carbon/config.go b/carbon/config.go index 22b4cb47d..bd5836241 100644 --- a/carbon/config.go +++ b/carbon/config.go @@ -81,7 +81,7 @@ type whisperConfig struct { type cacheConfig struct { MaxSize uint32 `toml:"max-size"` WriteStrategy string `toml:"write-strategy"` - BloomSize uint `toml:"bloom-size"` + BloomSize uint64 `toml:"bloom-size"` } type carbonlinkConfig struct { diff --git a/go-carbon.conf.example b/go-carbon.conf.example index 3985e595c..1da243434 100644 --- a/go-carbon.conf.example +++ b/go-carbon.conf.example @@ -67,6 +67,9 @@ max-size = 1000000 # "noop" - pick metrics to write in unspecified order, # requires least CPU and improves cache responsiveness write-strategy = "max" +# If > 0 use bloom filter to detect new metrics instead of cache (EXPERIMENTAL) +# works better for multi-million metrics installations +bloom-size = 0 [udp] listen = ":2003" diff --git a/go.mod b/go.mod index 7be9ce71b..7f2b71ff6 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/go-graphite/go-carbon -go 1.20 +go 1.21 require ( cloud.google.com/go/pubsub v1.41.0 @@ -34,7 +34,8 @@ require ( ) require ( - github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb + github.com/cespare/xxhash/v2 v2.3.0 + github.com/greatroar/blobloom v0.8.0 golang.org/x/net v0.28.0 google.golang.org/protobuf v1.34.2 ) @@ -46,9 +47,7 @@ require ( cloud.google.com/go/compute/metadata v0.5.0 // indirect cloud.google.com/go/iam v1.1.12 // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140 // indirect github.com/eapache/go-resiliency v1.7.0 // indirect github.com/eapache/go-xerial-snappy v0.0.0-20230731223053-c322873962e3 // indirect github.com/eapache/queue v1.1.0 // indirect diff --git a/go.sum b/go.sum index 33a677f7d..276f20e24 100644 --- a/go.sum +++ b/go.sum @@ -11,7 +11,9 @@ cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykW cloud.google.com/go/iam v1.1.12 h1:JixGLimRrNGcxvJEQ8+clfLxPlbeZA6MuRJ+qJNQ5Xw= cloud.google.com/go/iam v1.1.12/go.mod h1:9LDX8J7dN5YRyzVHxwQzrQs9opFFqn0Mxs9nAeB+Hhg= cloud.google.com/go/kms v1.18.4 h1:dYN3OCsQ6wJLLtOnI8DGUwQ5shMusXsWCCC+s09ATsk= +cloud.google.com/go/kms v1.18.4/go.mod h1:SG1bgQ3UWW6/KdPo9uuJnzELXY5YTTMJtDYvajiQ22g= cloud.google.com/go/longrunning v0.5.11 h1:Havn1kGjz3whCfoD8dxMLP73Ph5w+ODyZB9RUsDxtGk= +cloud.google.com/go/longrunning v0.5.11/go.mod h1:rDn7//lmlfWV1Dx6IB4RatCPenTwwmqXuiP0/RgoEO4= cloud.google.com/go/pubsub v1.41.0 h1:ZPaM/CvTO6T+1tQOs/jJ4OEMpjtel0PTLV7j1JK+ZrI= cloud.google.com/go/pubsub v1.41.0/go.mod h1:g+YzC6w/3N91tzG66e2BZtp7WrpBBMXVa3Y9zVoOGpk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= @@ -45,9 +47,6 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgryski/go-expirecache v0.0.0-20170314133854-743ef98b2adb h1:X9MwMz6mVZEWcbhsri5TwaCm/Q4USFdAAmy1T7RCGjw= github.com/dgryski/go-expirecache v0.0.0-20170314133854-743ef98b2adb/go.mod h1:pD/+9DfmmQ+xvOI1fxUltHV69BxC1aeTILPQg9Kw1hE= -github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= -github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140 h1:y7y0Oa6UawqTFPCDw9JG6pdKt4F9pAhHv0B7FMGaGD0= -github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= github.com/dgryski/go-trigram v0.0.0-20160407183937-79ec494e1ad0 h1:b+7JSiBM+hnLQjP/lXztks5hnLt1PS46hktG9VOJgzo= github.com/dgryski/go-trigram v0.0.0-20160407183937-79ec494e1ad0/go.mod h1:qzKC/DpcxK67zaSHdCmIv3L9WJViHVinYXN2S7l3RM8= github.com/dgryski/httputil v0.0.0-20160116060654-189c2918cd08 h1:BGzXzhmOgLHlylvQ27Tcgz235JvonPEgdMtpaZaeZt0= @@ -108,6 +107,7 @@ github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= @@ -135,6 +135,8 @@ github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4= github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/greatroar/blobloom v0.8.0 h1:I9RlEkfqK9/6f1v9mFmDYegDQ/x0mISCpiNpAm23Pt4= +github.com/greatroar/blobloom v0.8.0/go.mod h1:mjMJ1hh1wjGVfr93QIHJ6FfDNVrA0IELv8OvMHJxHKs= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= @@ -168,11 +170,13 @@ github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2 github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/lomik/graphite-pickle v0.0.0-20171221213606-614e8df42119 h1:9kRJjaYdyzqGcGMeWeVif1vkToJvqzPEe5Vqx4IDXBg= github.com/lomik/graphite-pickle v0.0.0-20171221213606-614e8df42119/go.mod h1:C0xsTshsU0n/LkhSbjZx2UkLuWSa3uFmq9D35Ch4rNE= github.com/lomik/og-rek v0.0.0-20170411191824-628eefeb8d80 h1:KVyDGUXjVOdHQt24wIgY4ZdGFXHtQHLWw0L/MAK3Kb0= @@ -214,8 +218,7 @@ github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:N/ElC8H3+5X github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= -github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb h1:XfLJSPIOUX+osiMraVgIrMR27uMXnRJWGm1+GL8/63U= -github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb/go.mod h1:bR6DqgcAl1zTcOX8/pE2Qkj9XO00eCNqmKb7lXP8EAg= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/sevlyar/go-daemon v0.1.6 h1:EUh1MDjEM4BI109Jign0EaknA2izkOyi0LV3ro3QQGs= github.com/sevlyar/go-daemon v0.1.6/go.mod h1:6dJpPatBT9eUwM5VCw9Bt6CdX9Tk6UWvhW3MebLDRKE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -261,6 +264,7 @@ go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= @@ -417,11 +421,12 @@ gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.5.1 h1:EENdUnS3pdur5nybKYIh2Vfgc8IUNBjxDPSjtiJcOzU= +gotest.tools/v3 v3.5.1/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/vendor/github.com/dgryski/go-metro/LICENSE b/vendor/github.com/dgryski/go-metro/LICENSE deleted file mode 100644 index 6243b617c..000000000 --- a/vendor/github.com/dgryski/go-metro/LICENSE +++ /dev/null @@ -1,24 +0,0 @@ -This package is a mechanical translation of the reference C++ code for -MetroHash, available at https://github.com/jandrewrogers/MetroHash - -The MIT License (MIT) - -Copyright (c) 2016 Damian Gryski - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/vendor/github.com/dgryski/go-metro/README b/vendor/github.com/dgryski/go-metro/README deleted file mode 100644 index 5ecebb385..000000000 --- a/vendor/github.com/dgryski/go-metro/README +++ /dev/null @@ -1,6 +0,0 @@ -MetroHash - -This package is a mechanical translation of the reference C++ code for -MetroHash, available at https://github.com/jandrewrogers/MetroHash - -I claim no additional copyright over the original implementation. diff --git a/vendor/github.com/dgryski/go-metro/metro.py b/vendor/github.com/dgryski/go-metro/metro.py deleted file mode 100644 index 8dd4d26e6..000000000 --- a/vendor/github.com/dgryski/go-metro/metro.py +++ /dev/null @@ -1,199 +0,0 @@ -import peachpy.x86_64 - -k0 = 0xD6D018F5 -k1 = 0xA2AA033B -k2 = 0x62992FC1 -k3 = 0x30BC5B29 - -def advance(p,l,c): - ADD(p,c) - SUB(l,c) - -def imul(r,k): - t = GeneralPurposeRegister64() - MOV(t, k) - IMUL(r, t) - -def update32(v, p,idx, k, vadd): - r = GeneralPurposeRegister64() - MOV(r, [p + idx]) - imul(r, k) - ADD(v, r) - ROR(v, 29) - ADD(v, vadd) - -def final32(v, regs, keys): - r = GeneralPurposeRegister64() - MOV(r, v[regs[1]]) - ADD(r, v[regs[2]]) - imul(r, keys[0]) - ADD(r, v[regs[3]]) - ROR(r, 37) - imul(r, keys[1]) - XOR(v[regs[0]], r) - -seed = Argument(uint64_t) -buffer_base = Argument(ptr()) -buffer_len = Argument(int64_t) -buffer_cap = Argument(int64_t) - -def makeHash(name, args): - with Function(name, args, uint64_t) as function: - - reg_ptr = GeneralPurposeRegister64() - reg_ptr_len = GeneralPurposeRegister64() - reg_hash = GeneralPurposeRegister64() - - LOAD.ARGUMENT(reg_hash, seed) - LOAD.ARGUMENT(reg_ptr, buffer_base) - LOAD.ARGUMENT(reg_ptr_len, buffer_len) - - imul(reg_hash, k0) - r = GeneralPurposeRegister64() - MOV(r, k2*k0) - ADD(reg_hash, r) - - after32 = Label("after32") - - CMP(reg_ptr_len, 32) - JL(after32) - v = [GeneralPurposeRegister64() for _ in range(4)] - for i in range(4): - MOV(v[i], reg_hash) - - with Loop() as loop: - update32(v[0], reg_ptr, 0, k0, v[2]) - update32(v[1], reg_ptr, 8, k1, v[3]) - update32(v[2], reg_ptr, 16, k2, v[0]) - update32(v[3], reg_ptr, 24, k3, v[1]) - - ADD(reg_ptr, 32) - SUB(reg_ptr_len, 32) - CMP(reg_ptr_len, 32) - JGE(loop.begin) - - final32(v, [2,0,3,1], [k0, k1]) - final32(v, [3,1,2,0], [k1, k0]) - final32(v, [0,0,2,3], [k0, k1]) - final32(v, [1,1,3,2], [k1, k0]) - - XOR(v[0], v[1]) - ADD(reg_hash, v[0]) - - LABEL(after32) - - after16 = Label("after16") - CMP(reg_ptr_len, 16) - JL(after16) - - for i in range(2): - MOV(v[i], [reg_ptr]) - imul(v[i], k2) - ADD(v[i], reg_hash) - - advance(reg_ptr, reg_ptr_len, 8) - - ROR(v[i], 29) - imul(v[i], k3) - - r = GeneralPurposeRegister64() - MOV(r, v[0]) - imul(r, k0) - ROR(r, 21) - ADD(r, v[1]) - XOR(v[0], r) - - MOV(r, v[1]) - imul(r, k3) - ROR(r, 21) - ADD(r, v[0]) - XOR(v[1], r) - - ADD(reg_hash, v[1]) - - LABEL(after16) - - after8 = Label("after8") - CMP(reg_ptr_len, 8) - JL(after8) - - r = GeneralPurposeRegister64() - MOV(r, [reg_ptr]) - imul(r, k3) - ADD(reg_hash, r) - advance(reg_ptr, reg_ptr_len, 8) - - MOV(r, reg_hash) - ROR(r, 55) - imul(r, k1) - XOR(reg_hash, r) - - LABEL(after8) - - after4 = Label("after4") - CMP(reg_ptr_len, 4) - JL(after4) - - r = GeneralPurposeRegister64() - XOR(r, r) - MOV(r.as_dword, dword[reg_ptr]) - imul(r, k3) - ADD(reg_hash, r) - advance(reg_ptr, reg_ptr_len, 4) - - MOV(r, reg_hash) - ROR(r, 26) - imul(r, k1) - XOR(reg_hash, r) - - LABEL(after4) - - after2 = Label("after2") - CMP(reg_ptr_len, 2) - JL(after2) - - r = GeneralPurposeRegister64() - XOR(r,r) - MOV(r.as_word, word[reg_ptr]) - imul(r, k3) - ADD(reg_hash, r) - advance(reg_ptr, reg_ptr_len, 2) - - MOV(r, reg_hash) - ROR(r, 48) - imul(r, k1) - XOR(reg_hash, r) - - LABEL(after2) - - after1 = Label("after1") - CMP(reg_ptr_len, 1) - JL(after1) - - r = GeneralPurposeRegister64() - MOVZX(r, byte[reg_ptr]) - imul(r, k3) - ADD(reg_hash, r) - - MOV(r, reg_hash) - ROR(r, 37) - imul(r, k1) - XOR(reg_hash, r) - - LABEL(after1) - - r = GeneralPurposeRegister64() - MOV(r, reg_hash) - ROR(r, 28) - XOR(reg_hash, r) - - imul(reg_hash, k0) - - MOV(r, reg_hash) - ROR(r, 29) - XOR(reg_hash, r) - - RETURN(reg_hash) - -makeHash("Hash64", (buffer_base, buffer_len, buffer_cap, seed)) -makeHash("Hash64Str", (buffer_base, buffer_len, seed)) \ No newline at end of file diff --git a/vendor/github.com/dgryski/go-metro/metro128.go b/vendor/github.com/dgryski/go-metro/metro128.go deleted file mode 100644 index e8dd8ddbf..000000000 --- a/vendor/github.com/dgryski/go-metro/metro128.go +++ /dev/null @@ -1,94 +0,0 @@ -package metro - -import "encoding/binary" - -func rotate_right(v uint64, k uint) uint64 { - return (v >> k) | (v << (64 - k)) -} - -func Hash128(buffer []byte, seed uint64) (uint64, uint64) { - - const ( - k0 = 0xC83A91E1 - k1 = 0x8648DBDB - k2 = 0x7BDEC03B - k3 = 0x2F5870A5 - ) - - ptr := buffer - - var v [4]uint64 - - v[0] = (seed - k0) * k3 - v[1] = (seed + k1) * k2 - - if len(ptr) >= 32 { - v[2] = (seed + k0) * k2 - v[3] = (seed - k1) * k3 - - for len(ptr) >= 32 { - v[0] += binary.LittleEndian.Uint64(ptr) * k0 - ptr = ptr[8:] - v[0] = rotate_right(v[0], 29) + v[2] - v[1] += binary.LittleEndian.Uint64(ptr) * k1 - ptr = ptr[8:] - v[1] = rotate_right(v[1], 29) + v[3] - v[2] += binary.LittleEndian.Uint64(ptr) * k2 - ptr = ptr[8:] - v[2] = rotate_right(v[2], 29) + v[0] - v[3] += binary.LittleEndian.Uint64(ptr) * k3 - ptr = ptr[8:] - v[3] = rotate_right(v[3], 29) + v[1] - } - - v[2] ^= rotate_right(((v[0]+v[3])*k0)+v[1], 21) * k1 - v[3] ^= rotate_right(((v[1]+v[2])*k1)+v[0], 21) * k0 - v[0] ^= rotate_right(((v[0]+v[2])*k0)+v[3], 21) * k1 - v[1] ^= rotate_right(((v[1]+v[3])*k1)+v[2], 21) * k0 - } - - if len(ptr) >= 16 { - v[0] += binary.LittleEndian.Uint64(ptr) * k2 - ptr = ptr[8:] - v[0] = rotate_right(v[0], 33) * k3 - v[1] += binary.LittleEndian.Uint64(ptr) * k2 - ptr = ptr[8:] - v[1] = rotate_right(v[1], 33) * k3 - v[0] ^= rotate_right((v[0]*k2)+v[1], 45) * k1 - v[1] ^= rotate_right((v[1]*k3)+v[0], 45) * k0 - } - - if len(ptr) >= 8 { - v[0] += binary.LittleEndian.Uint64(ptr) * k2 - ptr = ptr[8:] - v[0] = rotate_right(v[0], 33) * k3 - v[0] ^= rotate_right((v[0]*k2)+v[1], 27) * k1 - } - - if len(ptr) >= 4 { - v[1] += uint64(binary.LittleEndian.Uint32(ptr)) * k2 - ptr = ptr[4:] - v[1] = rotate_right(v[1], 33) * k3 - v[1] ^= rotate_right((v[1]*k3)+v[0], 46) * k0 - } - - if len(ptr) >= 2 { - v[0] += uint64(binary.LittleEndian.Uint16(ptr)) * k2 - ptr = ptr[2:] - v[0] = rotate_right(v[0], 33) * k3 - v[0] ^= rotate_right((v[0]*k2)+v[1], 22) * k1 - } - - if len(ptr) >= 1 { - v[1] += uint64(ptr[0]) * k2 - v[1] = rotate_right(v[1], 33) * k3 - v[1] ^= rotate_right((v[1]*k3)+v[0], 58) * k0 - } - - v[0] += rotate_right((v[0]*k0)+v[1], 13) - v[1] += rotate_right((v[1]*k1)+v[0], 37) - v[0] += rotate_right((v[0]*k2)+v[1], 13) - v[1] += rotate_right((v[1]*k3)+v[0], 37) - - return v[0], v[1] -} diff --git a/vendor/github.com/dgryski/go-metro/metro64.go b/vendor/github.com/dgryski/go-metro/metro64.go deleted file mode 100644 index 458a91219..000000000 --- a/vendor/github.com/dgryski/go-metro/metro64.go +++ /dev/null @@ -1,89 +0,0 @@ -//go:build noasm || !amd64 || !gc || purego -// +build noasm !amd64 !gc purego - -package metro - -import ( - "encoding/binary" - "math/bits" -) - -func Hash64(buffer []byte, seed uint64) uint64 { - - const ( - k0 = 0xD6D018F5 - k1 = 0xA2AA033B - k2 = 0x62992FC1 - k3 = 0x30BC5B29 - ) - - ptr := buffer - - hash := (seed + k2) * k0 - - if len(ptr) >= 32 { - v0, v1, v2, v3 := hash, hash, hash, hash - - for len(ptr) >= 32 { - v0 += binary.LittleEndian.Uint64(ptr[:8]) * k0 - v0 = bits.RotateLeft64(v0, -29) + v2 - v1 += binary.LittleEndian.Uint64(ptr[8:16]) * k1 - v1 = bits.RotateLeft64(v1, -29) + v3 - v2 += binary.LittleEndian.Uint64(ptr[16:24]) * k2 - v2 = bits.RotateLeft64(v2, -29) + v0 - v3 += binary.LittleEndian.Uint64(ptr[24:32]) * k3 - v3 = bits.RotateLeft64(v3, -29) + v1 - ptr = ptr[32:] - } - - v2 ^= bits.RotateLeft64(((v0+v3)*k0)+v1, -37) * k1 - v3 ^= bits.RotateLeft64(((v1+v2)*k1)+v0, -37) * k0 - v0 ^= bits.RotateLeft64(((v0+v2)*k0)+v3, -37) * k1 - v1 ^= bits.RotateLeft64(((v1+v3)*k1)+v2, -37) * k0 - hash += v0 ^ v1 - } - - if len(ptr) >= 16 { - v0 := hash + (binary.LittleEndian.Uint64(ptr[:8]) * k2) - v0 = bits.RotateLeft64(v0, -29) * k3 - v1 := hash + (binary.LittleEndian.Uint64(ptr[8:16]) * k2) - v1 = bits.RotateLeft64(v1, -29) * k3 - v0 ^= bits.RotateLeft64(v0*k0, -21) + v1 - v1 ^= bits.RotateLeft64(v1*k3, -21) + v0 - hash += v1 - ptr = ptr[16:] - } - - if len(ptr) >= 8 { - hash += binary.LittleEndian.Uint64(ptr[:8]) * k3 - ptr = ptr[8:] - hash ^= bits.RotateLeft64(hash, -55) * k1 - } - - if len(ptr) >= 4 { - hash += uint64(binary.LittleEndian.Uint32(ptr[:4])) * k3 - hash ^= bits.RotateLeft64(hash, -26) * k1 - ptr = ptr[4:] - } - - if len(ptr) >= 2 { - hash += uint64(binary.LittleEndian.Uint16(ptr[:2])) * k3 - ptr = ptr[2:] - hash ^= bits.RotateLeft64(hash, -48) * k1 - } - - if len(ptr) >= 1 { - hash += uint64(ptr[0]) * k3 - hash ^= bits.RotateLeft64(hash, -37) * k1 - } - - hash ^= bits.RotateLeft64(hash, -28) - hash *= k0 - hash ^= bits.RotateLeft64(hash, -29) - - return hash -} - -func Hash64Str(buffer string, seed uint64) uint64 { - return Hash64([]byte(buffer), seed) -} diff --git a/vendor/github.com/dgryski/go-metro/metro_amd64.s b/vendor/github.com/dgryski/go-metro/metro_amd64.s deleted file mode 100644 index f580ab830..000000000 --- a/vendor/github.com/dgryski/go-metro/metro_amd64.s +++ /dev/null @@ -1,373 +0,0 @@ -// +build !noasm -// +build gc -// +build !purego - -// Generated by PeachPy 0.2.0 from metro.py - -// func Hash64(buffer_base uintptr, buffer_len int64, buffer_cap int64, seed uint64) uint64 -TEXT ·Hash64(SB),4,$0-40 - MOVQ seed+24(FP), AX - MOVQ buffer_base+0(FP), BX - MOVQ buffer_len+8(FP), CX - MOVQ $3603962101, DX - IMULQ DX, AX - MOVQ $5961697176435608501, DX - ADDQ DX, AX - CMPQ CX, $32 - JLT after32 - MOVQ AX, DX - MOVQ AX, DI - MOVQ AX, SI - MOVQ AX, BP -loop_begin: - MOVQ 0(BX), R8 - MOVQ $3603962101, R9 - IMULQ R9, R8 - ADDQ R8, DX - RORQ $29, DX - ADDQ SI, DX - MOVQ 8(BX), R8 - MOVQ $2729050939, R9 - IMULQ R9, R8 - ADDQ R8, DI - RORQ $29, DI - ADDQ BP, DI - MOVQ 16(BX), R8 - MOVQ $1654206401, R9 - IMULQ R9, R8 - ADDQ R8, SI - RORQ $29, SI - ADDQ DX, SI - MOVQ 24(BX), R8 - MOVQ $817650473, R9 - IMULQ R9, R8 - ADDQ R8, BP - RORQ $29, BP - ADDQ DI, BP - ADDQ $32, BX - SUBQ $32, CX - CMPQ CX, $32 - JGE loop_begin - MOVQ DX, R8 - ADDQ BP, R8 - MOVQ $3603962101, R9 - IMULQ R9, R8 - ADDQ DI, R8 - RORQ $37, R8 - MOVQ $2729050939, R9 - IMULQ R9, R8 - XORQ R8, SI - MOVQ DI, R8 - ADDQ SI, R8 - MOVQ $2729050939, R9 - IMULQ R9, R8 - ADDQ DX, R8 - RORQ $37, R8 - MOVQ $3603962101, R9 - IMULQ R9, R8 - XORQ R8, BP - MOVQ DX, R8 - ADDQ SI, R8 - MOVQ $3603962101, R9 - IMULQ R9, R8 - ADDQ BP, R8 - RORQ $37, R8 - MOVQ $2729050939, R9 - IMULQ R9, R8 - XORQ R8, DX - MOVQ DI, R8 - ADDQ BP, R8 - MOVQ $2729050939, BP - IMULQ BP, R8 - ADDQ SI, R8 - RORQ $37, R8 - MOVQ $3603962101, SI - IMULQ SI, R8 - XORQ R8, DI - XORQ DI, DX - ADDQ DX, AX -after32: - CMPQ CX, $16 - JLT after16 - MOVQ 0(BX), DX - MOVQ $1654206401, DI - IMULQ DI, DX - ADDQ AX, DX - ADDQ $8, BX - SUBQ $8, CX - RORQ $29, DX - MOVQ $817650473, DI - IMULQ DI, DX - MOVQ 0(BX), DI - MOVQ $1654206401, SI - IMULQ SI, DI - ADDQ AX, DI - ADDQ $8, BX - SUBQ $8, CX - RORQ $29, DI - MOVQ $817650473, SI - IMULQ SI, DI - MOVQ DX, SI - MOVQ $3603962101, BP - IMULQ BP, SI - RORQ $21, SI - ADDQ DI, SI - XORQ SI, DX - MOVQ DI, SI - MOVQ $817650473, BP - IMULQ BP, SI - RORQ $21, SI - ADDQ DX, SI - XORQ SI, DI - ADDQ DI, AX -after16: - CMPQ CX, $8 - JLT after8 - MOVQ 0(BX), DX - MOVQ $817650473, DI - IMULQ DI, DX - ADDQ DX, AX - ADDQ $8, BX - SUBQ $8, CX - MOVQ AX, DX - RORQ $55, DX - MOVQ $2729050939, DI - IMULQ DI, DX - XORQ DX, AX -after8: - CMPQ CX, $4 - JLT after4 - XORQ DX, DX - MOVL 0(BX), DX - MOVQ $817650473, DI - IMULQ DI, DX - ADDQ DX, AX - ADDQ $4, BX - SUBQ $4, CX - MOVQ AX, DX - RORQ $26, DX - MOVQ $2729050939, DI - IMULQ DI, DX - XORQ DX, AX -after4: - CMPQ CX, $2 - JLT after2 - XORQ DX, DX - MOVW 0(BX), DX - MOVQ $817650473, DI - IMULQ DI, DX - ADDQ DX, AX - ADDQ $2, BX - SUBQ $2, CX - MOVQ AX, DX - RORQ $48, DX - MOVQ $2729050939, DI - IMULQ DI, DX - XORQ DX, AX -after2: - CMPQ CX, $1 - JLT after1 - MOVBQZX 0(BX), BX - MOVQ $817650473, CX - IMULQ CX, BX - ADDQ BX, AX - MOVQ AX, BX - RORQ $37, BX - MOVQ $2729050939, CX - IMULQ CX, BX - XORQ BX, AX -after1: - MOVQ AX, BX - RORQ $28, BX - XORQ BX, AX - MOVQ $3603962101, BX - IMULQ BX, AX - MOVQ AX, BX - RORQ $29, BX - XORQ BX, AX - MOVQ AX, ret+32(FP) - RET - -// func Hash64Str(buffer_base uintptr, buffer_len int64, seed uint64) uint64 -TEXT ·Hash64Str(SB),4,$0-32 - MOVQ seed+16(FP), AX - MOVQ buffer_base+0(FP), BX - MOVQ buffer_len+8(FP), CX - MOVQ $3603962101, DX - IMULQ DX, AX - MOVQ $5961697176435608501, DX - ADDQ DX, AX - CMPQ CX, $32 - JLT after32 - MOVQ AX, DX - MOVQ AX, DI - MOVQ AX, SI - MOVQ AX, BP -loop_begin: - MOVQ 0(BX), R8 - MOVQ $3603962101, R9 - IMULQ R9, R8 - ADDQ R8, DX - RORQ $29, DX - ADDQ SI, DX - MOVQ 8(BX), R8 - MOVQ $2729050939, R9 - IMULQ R9, R8 - ADDQ R8, DI - RORQ $29, DI - ADDQ BP, DI - MOVQ 16(BX), R8 - MOVQ $1654206401, R9 - IMULQ R9, R8 - ADDQ R8, SI - RORQ $29, SI - ADDQ DX, SI - MOVQ 24(BX), R8 - MOVQ $817650473, R9 - IMULQ R9, R8 - ADDQ R8, BP - RORQ $29, BP - ADDQ DI, BP - ADDQ $32, BX - SUBQ $32, CX - CMPQ CX, $32 - JGE loop_begin - MOVQ DX, R8 - ADDQ BP, R8 - MOVQ $3603962101, R9 - IMULQ R9, R8 - ADDQ DI, R8 - RORQ $37, R8 - MOVQ $2729050939, R9 - IMULQ R9, R8 - XORQ R8, SI - MOVQ DI, R8 - ADDQ SI, R8 - MOVQ $2729050939, R9 - IMULQ R9, R8 - ADDQ DX, R8 - RORQ $37, R8 - MOVQ $3603962101, R9 - IMULQ R9, R8 - XORQ R8, BP - MOVQ DX, R8 - ADDQ SI, R8 - MOVQ $3603962101, R9 - IMULQ R9, R8 - ADDQ BP, R8 - RORQ $37, R8 - MOVQ $2729050939, R9 - IMULQ R9, R8 - XORQ R8, DX - MOVQ DI, R8 - ADDQ BP, R8 - MOVQ $2729050939, BP - IMULQ BP, R8 - ADDQ SI, R8 - RORQ $37, R8 - MOVQ $3603962101, SI - IMULQ SI, R8 - XORQ R8, DI - XORQ DI, DX - ADDQ DX, AX -after32: - CMPQ CX, $16 - JLT after16 - MOVQ 0(BX), DX - MOVQ $1654206401, DI - IMULQ DI, DX - ADDQ AX, DX - ADDQ $8, BX - SUBQ $8, CX - RORQ $29, DX - MOVQ $817650473, DI - IMULQ DI, DX - MOVQ 0(BX), DI - MOVQ $1654206401, SI - IMULQ SI, DI - ADDQ AX, DI - ADDQ $8, BX - SUBQ $8, CX - RORQ $29, DI - MOVQ $817650473, SI - IMULQ SI, DI - MOVQ DX, SI - MOVQ $3603962101, BP - IMULQ BP, SI - RORQ $21, SI - ADDQ DI, SI - XORQ SI, DX - MOVQ DI, SI - MOVQ $817650473, BP - IMULQ BP, SI - RORQ $21, SI - ADDQ DX, SI - XORQ SI, DI - ADDQ DI, AX -after16: - CMPQ CX, $8 - JLT after8 - MOVQ 0(BX), DX - MOVQ $817650473, DI - IMULQ DI, DX - ADDQ DX, AX - ADDQ $8, BX - SUBQ $8, CX - MOVQ AX, DX - RORQ $55, DX - MOVQ $2729050939, DI - IMULQ DI, DX - XORQ DX, AX -after8: - CMPQ CX, $4 - JLT after4 - XORQ DX, DX - MOVL 0(BX), DX - MOVQ $817650473, DI - IMULQ DI, DX - ADDQ DX, AX - ADDQ $4, BX - SUBQ $4, CX - MOVQ AX, DX - RORQ $26, DX - MOVQ $2729050939, DI - IMULQ DI, DX - XORQ DX, AX -after4: - CMPQ CX, $2 - JLT after2 - XORQ DX, DX - MOVW 0(BX), DX - MOVQ $817650473, DI - IMULQ DI, DX - ADDQ DX, AX - ADDQ $2, BX - SUBQ $2, CX - MOVQ AX, DX - RORQ $48, DX - MOVQ $2729050939, DI - IMULQ DI, DX - XORQ DX, AX -after2: - CMPQ CX, $1 - JLT after1 - MOVBQZX 0(BX), BX - MOVQ $817650473, CX - IMULQ CX, BX - ADDQ BX, AX - MOVQ AX, BX - RORQ $37, BX - MOVQ $2729050939, CX - IMULQ CX, BX - XORQ BX, AX -after1: - MOVQ AX, BX - RORQ $28, BX - XORQ BX, AX - MOVQ $3603962101, BX - IMULQ BX, AX - MOVQ AX, BX - RORQ $29, BX - XORQ BX, AX - MOVQ AX, ret+24(FP) - RET diff --git a/vendor/github.com/dgryski/go-metro/metro_stub.go b/vendor/github.com/dgryski/go-metro/metro_stub.go deleted file mode 100644 index ccb97f11f..000000000 --- a/vendor/github.com/dgryski/go-metro/metro_stub.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build !noasm && amd64 && gc && !purego -// +build !noasm,amd64,gc,!purego - -package metro - -//go:generate python -m peachpy.x86_64 metro.py -S -o metro_amd64.s -mabi=goasm -//go:noescape - -func Hash64(buffer []byte, seed uint64) uint64 -func Hash64Str(buffer string, seed uint64) uint64 diff --git a/vendor/github.com/greatroar/blobloom/.gitattributes b/vendor/github.com/greatroar/blobloom/.gitattributes new file mode 100644 index 000000000..5ce45357e --- /dev/null +++ b/vendor/github.com/greatroar/blobloom/.gitattributes @@ -0,0 +1,2 @@ +# Work around https://github.com/golang/go/issues/52268. +**/testdata/fuzz/*/* eol=lf diff --git a/vendor/github.com/greatroar/blobloom/.golangci.yml b/vendor/github.com/greatroar/blobloom/.golangci.yml new file mode 100644 index 000000000..cf8c53f19 --- /dev/null +++ b/vendor/github.com/greatroar/blobloom/.golangci.yml @@ -0,0 +1,25 @@ +# Configuration for golangci-lint. + +linters: + disable: + - asciicheck + enable: + - gocognit + - gocyclo + - godot + - gofumpt + - lll + - misspell + - nakedret + - thelper + +issues: + exclude-rules: + - path: _test\.go + linters: + errcheck + +linters-settings: + govet: + enable: + - atomicalign diff --git a/vendor/github.com/greatroar/blobloom/LICENSE b/vendor/github.com/greatroar/blobloom/LICENSE new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/vendor/github.com/greatroar/blobloom/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/greatroar/blobloom/README.md b/vendor/github.com/greatroar/blobloom/README.md new file mode 100644 index 000000000..d615f2b9b --- /dev/null +++ b/vendor/github.com/greatroar/blobloom/README.md @@ -0,0 +1,86 @@ +Blobloom +======== + +A Bloom filter package for Go (golang) with no compile-time dependencies. + +This package implements a version of Bloom filters called [blocked Bloom filters]( +https://algo2.iti.kit.edu/documents/cacheefficientbloomfilters-jea.pdf), +which get a speed boost from using the CPU cache more efficiently +than regular Bloom filters. + +Unlike most Bloom filter packages for Go, +this one doesn't run a hash function for you. +That's a benefit if you need a custom hash +or you want pick the fastest one for an application. + +Usage +----- + +To construct a Bloom filter, you need to know how many keys you want to store +and what rate of false positives you find acceptable. + + f := blobloom.NewOptimized(blobloom.Config{ + Capacity: nkeys, // Expected number of keys. + FPRate: 1e-4, // Accept one false positive per 10,000 lookups. + }) + +To add a key: + + // import "github.com/cespare/xxhash/v2" + f.Add(xxhash.Sum64(key)) + +To test for the presence of a key in the filter: + + if f.Has(xxhash.Sum64(key)) { + // Key is probably in f. + } else { + // Key is certainly not in f. + } + +The false positive rate is defined as usual: +if you look up 10,000 random keys in a Bloom filter filled to capacity, +an expected one of those is a false positive for FPRate 1e-4. + +See the examples/ directory and the +[package documentation](https://pkg.go.dev/github.com/greatroar/blobloom) +for further usage information and examples. + +Hash functions +-------------- + +Blobloom does not provide hash functions. Instead, it requires client code to +represent each key as a single 64-bit hash value, leaving it to the user to +pick the right hash function for a particular problem. Here are some general +suggestions: + +* If you use Bloom filters to speed up access to a key-value store, you might +want to look at [xxh3](https://github.com/zeebo/xxh3) or [xxhash]( +https://github.com/cespare/xxhash). +* If your keys are cryptographic hashes, consider using the first 8 bytes of those hashes. +* If you use Bloom filters to make probabilistic decisions, a randomized hash +function such as [maphash](https://golang.org/pkg/hash/maphash) should prevent +the same false positives occurring every time. + +When evaluating a hash function, or designing a custom one, +make sure it is a 64-bit hash that properly mixes its input bits. +Casting a 32-bit hash to uint64 gives suboptimal results. +So does passing integer keys in without running them through a mixing function. + + + +License +------- + +Copyright © 2020-2023 the Blobloom authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/vendor/github.com/greatroar/blobloom/bloomfilter.go b/vendor/github.com/greatroar/blobloom/bloomfilter.go new file mode 100644 index 000000000..78f09de00 --- /dev/null +++ b/vendor/github.com/greatroar/blobloom/bloomfilter.go @@ -0,0 +1,279 @@ +// Copyright 2020-2022 the Blobloom authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package blobloom implements blocked Bloom filters. +// +// Blocked Bloom filters are an approximate set data structure: if a key has +// been added to a filter, a lookup of that key returns true, but if the key +// has not been added, there is a non-zero probability that the lookup still +// returns true (a false positive). False negatives are impossible: if the +// lookup for a key returns false, that key has not been added. +// +// In this package, keys are represented exclusively as hashes. Client code +// is responsible for supplying a 64-bit hash value. +// +// Compared to standard Bloom filters, blocked Bloom filters use the CPU +// cache more efficiently. A blocked Bloom filter is an array of ordinary +// Bloom filters of fixed size BlockBits (the blocks). The lower half of the +// hash selects the block to use. +// +// To achieve the same false positive rate (FPR) as a standard Bloom filter, +// a blocked Bloom filter requires more memory. For an FPR of at most 2e-6 +// (two in a million), it uses ~20% more memory. At 1e-10, the space required +// is double that of standard Bloom filter. +// +// For more details, see the 2010 paper by Putze, Sanders and Singler, +// https://algo2.iti.kit.edu/documents/cacheefficientbloomfilters-jea.pdf. +package blobloom + +import "math" + +// BlockBits is the number of bits per block and the minimum number of bits +// in a Filter. +// +// The value of this constant is chosen to match the L1 cache line size +// of popular architectures (386, amd64, arm64). +const BlockBits = 512 + +// MaxBits is the maximum number of bits supported by a Filter. +const MaxBits = BlockBits << 32 // 256GiB. + +// A Filter is a blocked Bloom filter. +type Filter struct { + b []block // Shards. + k int // Number of hash functions required. +} + +// New constructs a Bloom filter with given numbers of bits and hash functions. +// +// The number of bits should be at least BlockBits; smaller values are silently +// increased. +// +// The number of hashes reflects the number of hashes synthesized from the +// single hash passed in by the client. It is silently increased to two if +// a lower value is given. +func New(nbits uint64, nhashes int) *Filter { + nbits, nhashes = fixBitsAndHashes(nbits, nhashes) + + return &Filter{ + b: make([]block, nbits/BlockBits), + k: nhashes, + } +} + +func fixBitsAndHashes(nbits uint64, nhashes int) (uint64, int) { + if nbits < 1 { + nbits = BlockBits + } + if nhashes < 2 { + nhashes = 2 + } + if nbits > MaxBits { + panic("nbits exceeds MaxBits") + } + + // Round nbits up to a multiple of BlockBits. + if nbits%BlockBits != 0 { + nbits += BlockBits - nbits%BlockBits + } + + return nbits, nhashes +} + +// Add insert a key with hash value h into f. +func (f *Filter) Add(h uint64) { + h1, h2 := uint32(h>>32), uint32(h) + b := getblock(f.b, h2) + + for i := 1; i < f.k; i++ { + h1, h2 = doublehash(h1, h2, i) + b.setbit(h1) + } +} + +// log(1 - 1/BlockBits) computed with 128 bits precision. +// Note that this is extremely close to -1/BlockBits, +// which is what Wikipedia would have us use: +// https://en.wikipedia.org/wiki/Bloom_filter#Approximating_the_number_of_items_in_a_Bloom_filter. +const log1minus1divBlockbits = -0.0019550348358033505576274922418668121377 + +// Cardinality estimates the number of distinct keys added to f. +// +// The estimate is most reliable when f is filled to roughly its capacity. +// It gets worse as f gets more densely filled. When one of the blocks is +// entirely filled, the estimate becomes +Inf. +// +// The return value is the maximum likelihood estimate of Papapetrou, Siberski +// and Nejdl, summed over the blocks +// (https://www.win.tue.nl/~opapapetrou/papers/Bloomfilters-DAPD.pdf). +func (f *Filter) Cardinality() float64 { + return cardinality(f.k, f.b, onescount) +} + +func cardinality(nhashes int, b []block, onescount func(*block) int) float64 { + k := float64(nhashes - 1) + + // The probability of some bit not being set in a single insertion is + // p0 = (1-1/BlockBits)^k. + // + // logProb0Inv = 1 / log(p0) = 1 / (k*log(1-1/BlockBits)). + logProb0Inv := 1 / (k * log1minus1divBlockbits) + + var n float64 + for i := range b { + ones := onescount(&b[i]) + if ones == 0 { + continue + } + n += math.Log1p(-float64(ones) / BlockBits) + } + return n * logProb0Inv +} + +// Clear resets f to its empty state. +func (f *Filter) Clear() { + for i := 0; i < len(f.b); i++ { + f.b[i] = block{} + } +} + +// Empty reports whether f contains no keys. +func (f *Filter) Empty() bool { + for i := 0; i < len(f.b); i++ { + if f.b[i] != (block{}) { + return false + } + } + return true +} + +// Equals returns true if f and g contain the same keys (in terms of Has) +// when used with the same hash function. +func (f *Filter) Equals(g *Filter) bool { + if g.k != f.k || len(g.b) != len(f.b) { + return false + } + for i := range g.b { + if f.b[i] != g.b[i] { + return false + } + } + return true +} + +// Fill set f to a completely full filter. +// After Fill, Has returns true for any key. +func (f *Filter) Fill() { + for i := 0; i < len(f.b); i++ { + for j := 0; j < blockWords; j++ { + f.b[i][j] = ^uint32(0) + } + } +} + +// Has reports whether a key with hash value h has been added. +// It may return a false positive. +func (f *Filter) Has(h uint64) bool { + h1, h2 := uint32(h>>32), uint32(h) + b := getblock(f.b, h2) + + for i := 1; i < f.k; i++ { + h1, h2 = doublehash(h1, h2, i) + if !b.getbit(h1) { + return false + } + } + return true +} + +// doublehash generates the hash values to use in iteration i of +// enhanced double hashing from the values h1, h2 of the previous iteration. +// See https://www.ccs.neu.edu/home/pete/pub/bloom-filters-verification.pdf. +func doublehash(h1, h2 uint32, i int) (uint32, uint32) { + h1 = h1 + h2 + h2 = h2 + uint32(i) + return h1, h2 +} + +// NumBits returns the number of bits of f. +func (f *Filter) NumBits() uint64 { + return BlockBits * uint64(len(f.b)) +} + +func checkBinop(f, g *Filter) { + if len(f.b) != len(g.b) { + panic("Bloom filters do not have the same number of bits") + } + if f.k != g.k { + panic("Bloom filters do not have the same number of hash functions") + } +} + +// Intersect sets f to the intersection of f and g. +// +// Intersect panics when f and g do not have the same number of bits and +// hash functions. Both Filters must be using the same hash function(s), +// but Intersect cannot check this. +// +// Since Bloom filters may return false positives, Has may return true for +// a key that was not in both f and g. +// +// After Intersect, the estimates from f.Cardinality and f.FPRate should be +// considered unreliable. +func (f *Filter) Intersect(g *Filter) { + checkBinop(f, g) + f.intersect(g) +} + +// Union sets f to the union of f and g. +// +// Union panics when f and g do not have the same number of bits and +// hash functions. Both Filters must be using the same hash function(s), +// but Union cannot check this. +func (f *Filter) Union(g *Filter) { + checkBinop(f, g) + f.union(g) +} + +const ( + wordSize = 32 + blockWords = BlockBits / wordSize +) + +// A block is a fixed-size Bloom filter, used as a shard of a Filter. +type block [blockWords]uint32 + +func getblock(b []block, h2 uint32) *block { + i := reducerange(h2, uint32(len(b))) + return &b[i] +} + +// reducerange maps i to an integer in the range [0,n). +// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ +func reducerange(i, n uint32) uint32 { + return uint32((uint64(i) * uint64(n)) >> 32) +} + +// getbit reports whether bit (i modulo BlockBits) is set. +func (b *block) getbit(i uint32) bool { + bit := uint32(1) << (i % wordSize) + x := (*b)[(i/wordSize)%blockWords] & bit + return x != 0 +} + +// setbit sets bit (i modulo BlockBits) of b. +func (b *block) setbit(i uint32) { + bit := uint32(1) << (i % wordSize) + (*b)[(i/wordSize)%blockWords] |= bit +} diff --git a/vendor/github.com/greatroar/blobloom/io.go b/vendor/github.com/greatroar/blobloom/io.go new file mode 100644 index 000000000..df104d9e9 --- /dev/null +++ b/vendor/github.com/greatroar/blobloom/io.go @@ -0,0 +1,246 @@ +// Copyright 2023 the Blobloom authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package blobloom + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + "strings" + "sync/atomic" +) + +const maxCommentLen = 44 + +// Dump writes f to w, with an optional comment string, in the binary format +// that a Loader accepts. It returns the number of bytes written to w. +// +// The comment may contain arbitrary data, within the limits layed out by the +// format description. It can be used to record the hash function to be used +// with a Filter. +func Dump(w io.Writer, f *Filter, comment string) (int64, error) { + return dump(w, f.b, f.k, comment) +} + +// DumpSync is like Dump, but for SyncFilters. +// +// If other goroutines are simultaneously modifying f, +// their modifications may not be reflected in the dump. +// Separate synchronization is required to prevent this. +// +// The format produced is the same as Dump's. The fact that +// the argument is a SyncFilter is not encoded in the dump. +func DumpSync(w io.Writer, f *SyncFilter, comment string) (n int64, err error) { + return dump(w, f.b, f.k, comment) +} + +func dump(w io.Writer, b []block, nhashes int, comment string) (n int64, err error) { + switch { + case len(b) == 0 || nhashes == 0: + err = errors.New("blobloom: won't dump uninitialized Filter") + case len(comment) > maxCommentLen: + err = fmt.Errorf("blobloom: comment of length %d too long", len(comment)) + case strings.IndexByte(comment, 0) != -1: + err = fmt.Errorf("blobloom: comment %q contains zero byte", len(comment)) + } + if err != nil { + return 0, err + } + + var buf [64]byte + copy(buf[:8], "blobloom") + // As documented in the comment for Loader, we store one less than the + // number of blocks. This way, we can use the otherwise invalid value 0 + // and store 2³² blocks instead of at most 2³²-1. + binary.LittleEndian.PutUint32(buf[12:], uint32(len(b)-1)) + binary.LittleEndian.PutUint32(buf[16:], uint32(nhashes)) + copy(buf[20:], comment) + + k, err := w.Write(buf[:]) + n = int64(k) + if err != nil { + return n, err + } + + for i := range b { + for j := range b[i] { + x := atomic.LoadUint32(&b[i][j]) + binary.LittleEndian.PutUint32(buf[4*j:], x) + } + k, err = w.Write(buf[:]) + n += int64(k) + if err != nil { + break + } + } + + return n, err +} + +// A Loader reads a Filter or SyncFilter from an io.Reader. +// +// A Loader accepts the binary format produced by Dump. The format starts +// with a 64-byte header: +// - the string "blobloom", in ASCII; +// - a four-byte version number, which must be zero; +// - the number of Bloom filter blocks, minus one, as a 32-bit integer; +// - the number of hashes, as a 32-bit integer; +// - a comment of at most 44 non-zero bytes, padded to 44 bytes with zeros. +// +// After the header come the 512-bit blocks, divided into sixteen 32-bit limbs. +// All integers are little-endian. +type Loader struct { + buf [64]byte + r io.Reader + err error + + Comment string // Comment field. Filled in by NewLoader. + nblocks uint64 + nhashes int +} + +// NewLoader parses the format header from r and returns a Loader +// that can be used to load a Filter from it. +func NewLoader(r io.Reader) (*Loader, error) { + l := &Loader{r: r} + + err := l.fillbuf() + if err != nil { + return nil, err + } + + version := binary.LittleEndian.Uint32(l.buf[8:]) + // See comment in dump for the +1. + l.nblocks = 1 + uint64(binary.LittleEndian.Uint32(l.buf[12:])) + l.nhashes = int(binary.LittleEndian.Uint32(l.buf[16:])) + comment := l.buf[20:] + + switch { + case string(l.buf[:8]) != "blobloom": + err = errors.New("blobloom: not a Bloom filter dump") + case version != 0: + err = errors.New("blobloom: unsupported dump version") + case l.nhashes == 0: + err = errors.New("blobloom: zero hashes in Bloom filter dump") + } + if err == nil { + comment, err = checkComment(comment) + l.Comment = string(comment) + } + + if err != nil { + l = nil + } + return l, err +} + +// Load sets f to the union of f and the Loader's filter, then returns f. +// If f is nil, a new Filter of the appropriate size is constructed. +// +// If f is not nil and an error occurs while reading from the Loader, +// f may end up in an inconsistent state. +func (l *Loader) Load(f *Filter) (*Filter, error) { + if f == nil { + nbits := BlockBits * l.nblocks + if nbits > MaxBits { + return nil, fmt.Errorf("blobloom: %d blocks is too large", l.nblocks) + } + f = New(nbits, int(l.nhashes)) + } else if err := l.checkBitsAndHashes(len(f.b), f.k); err != nil { + return nil, err + } + + for i := range f.b { + if err := l.fillbuf(); err != nil { + return nil, err + } + + for j := range f.b[i] { + f.b[i][j] |= binary.LittleEndian.Uint32(l.buf[4*j:]) + } + } + + return f, nil +} + +// Load sets f to the union of f and the Loader's filter, then returns f. +// If f is nil, a new SyncFilter of the appropriate size is constructed. +// Else, LoadSync may run concurrently with other modifications to f. +// +// If f is not nil and an error occurs while reading from the Loader, +// f may end up in an inconsistent state. +func (l *Loader) LoadSync(f *SyncFilter) (*SyncFilter, error) { + if f == nil { + nbits := BlockBits * l.nblocks + if nbits > MaxBits { + return nil, fmt.Errorf("blobloom: %d blocks is too large", l.nblocks) + } + f = NewSync(nbits, int(l.nhashes)) + } else if err := l.checkBitsAndHashes(len(f.b), f.k); err != nil { + return nil, err + } + + for i := range f.b { + if err := l.fillbuf(); err != nil { + return nil, err + } + + for j := range f.b[i] { + p := &f.b[i][j] + x := binary.LittleEndian.Uint32(l.buf[4*j:]) + + for { + old := atomic.LoadUint32(p) + if atomic.CompareAndSwapUint32(p, old, old|x) { + break + } + } + } + } + + return f, nil +} + +func (l *Loader) checkBitsAndHashes(nblocks, nhashes int) error { + switch { + case nblocks != int(l.nblocks): + return fmt.Errorf("blobloom: Filter has %d blocks, but dump has %d", nblocks, l.nblocks) + case nhashes != l.nhashes: + return fmt.Errorf("blobloom: Filter has %d hashes, but dump has %d", nhashes, l.nhashes) + } + return nil +} + +func (l *Loader) fillbuf() error { + _, err := io.ReadFull(l.r, l.buf[:]) + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return err +} + +func checkComment(p []byte) ([]byte, error) { + eos := bytes.IndexByte(p, 0) + if eos != -1 { + tail := p[eos+1:] + if !bytes.Equal(tail, make([]byte, len(tail))) { + return nil, fmt.Errorf("blobloom: comment block %q contains zero byte", p) + } + p = p[:eos] + } + return p, nil +} diff --git a/vendor/github.com/greatroar/blobloom/optimize.go b/vendor/github.com/greatroar/blobloom/optimize.go new file mode 100644 index 000000000..0497e7796 --- /dev/null +++ b/vendor/github.com/greatroar/blobloom/optimize.go @@ -0,0 +1,201 @@ +// Copyright 2020 the Blobloom authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package blobloom + +import "math" + +// A Config holds parameters for Optimize or NewOptimized. +type Config struct { + // Trigger the "contains filtered or unexported fields" message for + // forward compatibility and force the caller to use named fields. + _ struct{} + + // Capacity is the expected number of distinct keys to be added. + // More keys can always be added, but the false positive rate can be + // expected to drop below FPRate if their number exceeds the Capacity. + Capacity uint64 + + // Desired lower bound on the false positive rate when the Bloom filter + // has been filled to its capacity. FPRate must be between zero + // (exclusive) and one (inclusive). + FPRate float64 + + // Maximum size of the Bloom filter in bits. Zero means the global + // MaxBits constant. A value less than BlockBits means BlockBits. + MaxBits uint64 +} + +// NewOptimized is shorthand for New(Optimize(config)). +func NewOptimized(config Config) *Filter { + return New(Optimize(config)) +} + +// NewSyncOptimized is shorthand for New(Optimize(config)). +func NewSyncOptimized(config Config) *SyncFilter { + return NewSync(Optimize(config)) +} + +// Optimize returns numbers of keys and hash functions that achieve the +// desired false positive described by config. +// +// Optimize panics when config.FPRate is invalid. +// +// The estimated number of bits is imprecise for false positives rates below +// ca. 1e-15. +func Optimize(config Config) (nbits uint64, nhashes int) { + n := float64(config.Capacity) + p := config.FPRate + + if p <= 0 || p > 1 { + panic("false positive rate for a Bloom filter must be > 0, <= 1") + } + if n == 0 { + // Assume the client wants to add at least one key; log2(0) = -inf. + n = 1 + } + + // The optimal nbits/n is c = -log2(p) / ln(2) for a vanilla Bloom filter. + c := math.Ceil(-math.Log2(p) / math.Ln2) + if c < float64(len(correctC)) { + c = float64(correctC[int(c)]) + } else { + // We can't achieve the desired FPR. Just triple the number of bits. + c *= 3 + } + nbits = uint64(c * n) + + // Round up to a multiple of BlockBits. + if nbits%BlockBits != 0 { + nbits += BlockBits - nbits%BlockBits + } + + var maxbits uint64 = MaxBits + if config.MaxBits != 0 && config.MaxBits < maxbits { + maxbits = config.MaxBits + if maxbits < BlockBits { + maxbits = BlockBits + } + } + if nbits > maxbits { + nbits = maxbits + // Round down to a multiple of BlockBits. + nbits -= nbits % BlockBits + } + + // The corresponding optimal number of hash functions is k = c * log(2). + // Try rounding up and down to see which rounding is better. + c = float64(nbits) / n + k := c * math.Ln2 + if k < 1 { + nhashes = 1 + return nbits, nhashes + } + + ceilK, floorK := math.Floor(k), math.Ceil(k) + if ceilK == floorK { + return nbits, int(ceilK) + } + + fprCeil, _ := fpRate(c, math.Ceil(k)) + fprFloor, _ := fpRate(c, math.Floor(k)) + if fprFloor < fprCeil { + k = floorK + } else { + k = ceilK + } + + return nbits, int(k) +} + +// correctC maps c = m/n for a vanilla Bloom filter to the c' for a +// blocked Bloom filter. +// +// This is Putze et al.'s Table I, extended down to zero. +// For c > 34, the values become huge and are hard to compute. +var correctC = []byte{ + 1, 1, 2, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 20, 21, 23, + 25, 26, 28, 30, 32, 35, 38, 40, 44, 48, 51, 58, 64, 74, 90, +} + +// FPRate computes an estimate of the false positive rate of a Bloom filter +// after nkeys distinct keys have been added. +func FPRate(nkeys, nbits uint64, nhashes int) float64 { + if nkeys == 0 { + return 0 + } + p, _ := fpRate(float64(nbits)/float64(nkeys), float64(nhashes)) + return p +} + +func fpRate(c, k float64) (p float64, iter int) { + switch { + case c == 0: + panic("0 bits per key is too few") + case k == 0: + panic("0 hashes is too few") + } + + // Putze et al.'s Equation (3). + // + // The Poisson distribution has a single spike around its mean + // BlockBits/c that gets slimmer and further away from zero as c tends + // to zero (the Bloom filter gets more filled). We start at the mean, + // then add terms left and right of it until their relative contribution + // drops below ε. + const ε = 1e-9 + mean := BlockBits / c + + // Ceil to make sure we start at one, not zero. + i := math.Ceil(mean) + p = math.Exp(logPoisson(mean, i) + logFprBlock(BlockBits/i, k)) + + for j := i - 1; j > 0; j-- { + add := math.Exp(logPoisson(mean, j) + logFprBlock(BlockBits/j, k)) + p += add + iter++ + if add/p < ε { + break + } + } + + for j := i + 1; ; j++ { + add := math.Exp(logPoisson(mean, j) + logFprBlock(BlockBits/j, k)) + p += add + iter++ + if add/p < ε { + break + } + } + + return p, iter +} + +// FPRate computes an estimate of f's false positive rate after nkeys distinct +// keys have been added. +func (f *Filter) FPRate(nkeys uint64) float64 { + return FPRate(nkeys, f.NumBits(), f.k) +} + +// Log of the FPR of a single block, FPR = (1 - exp(-k/c))^k. +func logFprBlock(c, k float64) float64 { + return k * math.Log1p(-math.Exp(-k/c)) +} + +// Log of the Poisson distribution's pmf. +func logPoisson(λ, k float64) float64 { + lg, _ := math.Lgamma(k + 1) + return k*math.Log(λ) - λ - lg +} diff --git a/vendor/github.com/greatroar/blobloom/setop_64bit.go b/vendor/github.com/greatroar/blobloom/setop_64bit.go new file mode 100644 index 000000000..b5880380a --- /dev/null +++ b/vendor/github.com/greatroar/blobloom/setop_64bit.go @@ -0,0 +1,148 @@ +// Copyright 2020-2022 the Blobloom authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build (amd64 || arm64) && !nounsafe +// +build amd64 arm64 +// +build !nounsafe + +package blobloom + +import ( + "math/bits" + "sync/atomic" + "unsafe" +) + +// Block reinterpreted as array of uint64. +type block64 [BlockBits / 64]uint64 + +func (f *Filter) intersect(g *Filter) { + a, b := f.b, g.b + for len(a) >= 2 && len(b) >= 2 { + p := (*block64)(unsafe.Pointer(&a[0])) + q := (*block64)(unsafe.Pointer(&b[0])) + + p[0] &= q[0] + p[1] &= q[1] + p[2] &= q[2] + p[3] &= q[3] + p[4] &= q[4] + p[5] &= q[5] + p[6] &= q[6] + p[7] &= q[7] + + p = (*block64)(unsafe.Pointer(&a[1])) + q = (*block64)(unsafe.Pointer(&b[1])) + + p[0] &= q[0] + p[1] &= q[1] + p[2] &= q[2] + p[3] &= q[3] + p[4] &= q[4] + p[5] &= q[5] + p[6] &= q[6] + p[7] &= q[7] + + a, b = a[2:], b[2:] + } + + if len(a) > 0 && len(b) > 0 { + p := (*block64)(unsafe.Pointer(&a[0])) + q := (*block64)(unsafe.Pointer(&b[0])) + + p[0] &= q[0] + p[1] &= q[1] + p[2] &= q[2] + p[3] &= q[3] + p[4] &= q[4] + p[5] &= q[5] + p[6] &= q[6] + p[7] &= q[7] + } +} + +func (f *Filter) union(g *Filter) { + a, b := f.b, g.b + for len(a) >= 2 && len(b) >= 2 { + p := (*block64)(unsafe.Pointer(&a[0])) + q := (*block64)(unsafe.Pointer(&b[0])) + + p[0] |= q[0] + p[1] |= q[1] + p[2] |= q[2] + p[3] |= q[3] + p[4] |= q[4] + p[5] |= q[5] + p[6] |= q[6] + p[7] |= q[7] + + p = (*block64)(unsafe.Pointer(&a[1])) + q = (*block64)(unsafe.Pointer(&b[1])) + + p[0] |= q[0] + p[1] |= q[1] + p[2] |= q[2] + p[3] |= q[3] + p[4] |= q[4] + p[5] |= q[5] + p[6] |= q[6] + p[7] |= q[7] + + a, b = a[2:], b[2:] + } + + if len(a) > 0 && len(b) > 0 { + p := (*block64)(unsafe.Pointer(&a[0])) + q := (*block64)(unsafe.Pointer(&b[0])) + + p[0] |= q[0] + p[1] |= q[1] + p[2] |= q[2] + p[3] |= q[3] + p[4] |= q[4] + p[5] |= q[5] + p[6] |= q[6] + p[7] |= q[7] + } +} + +func onescount(b *block) (n int) { + p := (*block64)(unsafe.Pointer(&b[0])) + + n += bits.OnesCount64(p[0]) + n += bits.OnesCount64(p[1]) + n += bits.OnesCount64(p[2]) + n += bits.OnesCount64(p[3]) + n += bits.OnesCount64(p[4]) + n += bits.OnesCount64(p[5]) + n += bits.OnesCount64(p[6]) + n += bits.OnesCount64(p[7]) + + return n +} + +func onescountAtomic(b *block) (n int) { + p := (*block64)(unsafe.Pointer(&b[0])) + + n += bits.OnesCount64(atomic.LoadUint64(&p[0])) + n += bits.OnesCount64(atomic.LoadUint64(&p[1])) + n += bits.OnesCount64(atomic.LoadUint64(&p[2])) + n += bits.OnesCount64(atomic.LoadUint64(&p[3])) + n += bits.OnesCount64(atomic.LoadUint64(&p[4])) + n += bits.OnesCount64(atomic.LoadUint64(&p[5])) + n += bits.OnesCount64(atomic.LoadUint64(&p[6])) + n += bits.OnesCount64(atomic.LoadUint64(&p[7])) + + return n +} diff --git a/vendor/github.com/greatroar/blobloom/setop_other.go b/vendor/github.com/greatroar/blobloom/setop_other.go new file mode 100644 index 000000000..53749a20b --- /dev/null +++ b/vendor/github.com/greatroar/blobloom/setop_other.go @@ -0,0 +1,115 @@ +// Copyright 2020-2022 the Blobloom authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build (!amd64 && !arm64) || nounsafe +// +build !amd64,!arm64 nounsafe + +package blobloom + +import ( + "math/bits" + "sync/atomic" +) + +func (f *Filter) intersect(g *Filter) { + for i := range f.b { + f.b[i].intersect(&g.b[i]) + } +} + +func (f *Filter) union(g *Filter) { + for i := range f.b { + f.b[i].union(&g.b[i]) + } +} + +func (b *block) intersect(c *block) { + b[0] &= c[0] + b[1] &= c[1] + b[2] &= c[2] + b[3] &= c[3] + b[4] &= c[4] + b[5] &= c[5] + b[6] &= c[6] + b[7] &= c[7] + b[8] &= c[8] + b[9] &= c[9] + b[10] &= c[10] + b[11] &= c[11] + b[12] &= c[12] + b[13] &= c[13] + b[14] &= c[14] + b[15] &= c[15] +} + +func (b *block) union(c *block) { + b[0] |= c[0] + b[1] |= c[1] + b[2] |= c[2] + b[3] |= c[3] + b[4] |= c[4] + b[5] |= c[5] + b[6] |= c[6] + b[7] |= c[7] + b[8] |= c[8] + b[9] |= c[9] + b[10] |= c[10] + b[11] |= c[11] + b[12] |= c[12] + b[13] |= c[13] + b[14] |= c[14] + b[15] |= c[15] +} + +func onescount(b *block) (n int) { + n += bits.OnesCount32(b[0]) + n += bits.OnesCount32(b[1]) + n += bits.OnesCount32(b[2]) + n += bits.OnesCount32(b[3]) + n += bits.OnesCount32(b[4]) + n += bits.OnesCount32(b[5]) + n += bits.OnesCount32(b[6]) + n += bits.OnesCount32(b[7]) + n += bits.OnesCount32(b[8]) + n += bits.OnesCount32(b[9]) + n += bits.OnesCount32(b[10]) + n += bits.OnesCount32(b[11]) + n += bits.OnesCount32(b[12]) + n += bits.OnesCount32(b[13]) + n += bits.OnesCount32(b[14]) + n += bits.OnesCount32(b[15]) + + return n +} + +func onescountAtomic(b *block) (n int) { + n += bits.OnesCount32(atomic.LoadUint32(&b[0])) + n += bits.OnesCount32(atomic.LoadUint32(&b[1])) + n += bits.OnesCount32(atomic.LoadUint32(&b[2])) + n += bits.OnesCount32(atomic.LoadUint32(&b[3])) + n += bits.OnesCount32(atomic.LoadUint32(&b[4])) + n += bits.OnesCount32(atomic.LoadUint32(&b[5])) + n += bits.OnesCount32(atomic.LoadUint32(&b[6])) + n += bits.OnesCount32(atomic.LoadUint32(&b[7])) + n += bits.OnesCount32(atomic.LoadUint32(&b[8])) + n += bits.OnesCount32(atomic.LoadUint32(&b[9])) + n += bits.OnesCount32(atomic.LoadUint32(&b[10])) + n += bits.OnesCount32(atomic.LoadUint32(&b[11])) + n += bits.OnesCount32(atomic.LoadUint32(&b[12])) + n += bits.OnesCount32(atomic.LoadUint32(&b[13])) + n += bits.OnesCount32(atomic.LoadUint32(&b[14])) + n += bits.OnesCount32(atomic.LoadUint32(&b[15])) + + return n +} diff --git a/vendor/github.com/greatroar/blobloom/sync.go b/vendor/github.com/greatroar/blobloom/sync.go new file mode 100644 index 000000000..22503ba4f --- /dev/null +++ b/vendor/github.com/greatroar/blobloom/sync.go @@ -0,0 +1,145 @@ +// Copyright 2021-2022 the Blobloom authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package blobloom + +import "sync/atomic" + +// A SyncFilter is a Bloom filter that can be accessed and updated +// by multiple goroutines concurrently. +// +// A SyncFilter mostly behaves as a regular filter protected by a lock, +// +// type SyncFilter struct { +// Filter +// lock sync.Mutex +// } +// +// with each method taking and releasing the lock, +// but is implemented much more efficiently. +// See the method descriptions for exceptions to the previous rule. +type SyncFilter struct { + b []block // Shards. + k int // Number of hash functions required. +} + +// NewSync constructs a Bloom filter with given numbers of bits and hash functions. +// +// The number of bits should be at least BlockBits; smaller values are silently +// increased. +// +// The number of hashes reflects the number of hashes synthesized from the +// single hash passed in by the client. It is silently increased to two if +// a lower value is given. +func NewSync(nbits uint64, nhashes int) *SyncFilter { + nbits, nhashes = fixBitsAndHashes(nbits, nhashes) + + return &SyncFilter{ + b: make([]block, nbits/BlockBits), + k: nhashes, + } + +} + +// Add insert a key with hash value h into f. +func (f *SyncFilter) Add(h uint64) { + h1, h2 := uint32(h>>32), uint32(h) + b := getblock(f.b, h2) + + for i := 1; i < f.k; i++ { + h1, h2 = doublehash(h1, h2, i) + setbitAtomic(b, h1) + } +} + +// Cardinality estimates the number of distinct keys added to f. +// +// The estimate is most reliable when f is filled to roughly its capacity. +// It gets worse as f gets more densely filled. When one of the blocks is +// entirely filled, the estimate becomes +Inf. +// +// The return value is the maximum likelihood estimate of Papapetrou, Siberski +// and Nejdl, summed over the blocks +// (https://www.win.tue.nl/~opapapetrou/papers/Bloomfilters-DAPD.pdf). +// +// If other goroutines are concurrently adding keys, +// the estimate may lie in between what would have been returned +// before the concurrent updates started and what is returned +// after the updates complete. +func (f *SyncFilter) Cardinality() float64 { + return cardinality(f.k, f.b, onescountAtomic) +} + +// Empty reports whether f contains no keys. +// +// If other goroutines are concurrently adding keys, +// Empty may return a false positive. +func (f *SyncFilter) Empty() bool { + for i := 0; i < len(f.b); i++ { + for j := 0; j < blockWords; j++ { + if atomic.LoadUint32(&f.b[i][j]) != 0 { + return false + } + } + } + return true +} + +// Fill sets f to a completely full filter. +// After Fill, Has returns true for any key. +func (f *SyncFilter) Fill() { + for i := 0; i < len(f.b); i++ { + for j := 0; j < blockWords; j++ { + atomic.StoreUint32(&f.b[i][j], ^uint32(0)) + } + } +} + +// Has reports whether a key with hash value h has been added. +// It may return a false positive. +func (f *SyncFilter) Has(h uint64) bool { + h1, h2 := uint32(h>>32), uint32(h) + b := getblock(f.b, h2) + + for i := 1; i < f.k; i++ { + h1, h2 = doublehash(h1, h2, i) + if !getbitAtomic(b, h1) { + return false + } + } + return true +} + +// getbitAtomic reports whether bit (i modulo BlockBits) is set. +func getbitAtomic(b *block, i uint32) bool { + bit := uint32(1) << (i % wordSize) + x := atomic.LoadUint32(&(*b)[(i/wordSize)%blockWords]) + return x&bit != 0 +} + +// setbit sets bit (i modulo BlockBits) of b, atomically. +func setbitAtomic(b *block, i uint32) { + bit := uint32(1) << (i % wordSize) + p := &(*b)[(i/wordSize)%blockWords] + + for { + old := atomic.LoadUint32(p) + if old&bit != 0 { + // Checking here instead of checking the return value from + // the CAS is between 50% and 80% faster on the benchmark. + return + } + atomic.CompareAndSwapUint32(p, old, old|bit) + } +} diff --git a/vendor/github.com/greatroar/blobloom/test.sh b/vendor/github.com/greatroar/blobloom/test.sh new file mode 100644 index 000000000..bf90f54c5 --- /dev/null +++ b/vendor/github.com/greatroar/blobloom/test.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +set -e -x + +golangci-lint run . examples/* + +go test + +if [ "$(go env GOARCH)" = amd64 ]; then + go test -tags nounsafe + GOARCH=386 go test +fi + +for e in examples/*; do + (cd $e && go build && rm $(basename $e)) +done diff --git a/vendor/github.com/seiflotfy/cuckoofilter/.gitignore b/vendor/github.com/seiflotfy/cuckoofilter/.gitignore deleted file mode 100644 index 11b90db8d..000000000 --- a/vendor/github.com/seiflotfy/cuckoofilter/.gitignore +++ /dev/null @@ -1,26 +0,0 @@ -# Compiled Object files, Static and Dynamic libs (Shared Objects) -*.o -*.a -*.so - -# Folders -_obj -_test - -# Architecture specific extensions/prefixes -*.[568vq] -[568vq].out - -*.cgo1.go -*.cgo2.c -_cgo_defun.c -_cgo_gotypes.go -_cgo_export.* - -_testmain.go - -*.exe -*.test -*.prof - -.idea diff --git a/vendor/github.com/seiflotfy/cuckoofilter/LICENSE b/vendor/github.com/seiflotfy/cuckoofilter/LICENSE deleted file mode 100644 index 58393c98c..000000000 --- a/vendor/github.com/seiflotfy/cuckoofilter/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2015 Seif Lotfy - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - diff --git a/vendor/github.com/seiflotfy/cuckoofilter/README.md b/vendor/github.com/seiflotfy/cuckoofilter/README.md deleted file mode 100644 index 2a77fb393..000000000 --- a/vendor/github.com/seiflotfy/cuckoofilter/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# Cuckoo Filter - -[![GoDoc](https://godoc.org/github.com/seiflotfy/cuckoofilter?status.svg)](https://godoc.org/github.com/seiflotfy/cuckoofilter) [![CodeHunt.io](https://img.shields.io/badge/vote-codehunt.io-02AFD1.svg)](http://codehunt.io/sub/cuckoo-filter/?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) - -Cuckoo filter is a Bloom filter replacement for approximated set-membership queries. While Bloom filters are well-known space-efficient data structures to serve queries like "if item x is in a set?", they do not support deletion. Their variances to enable deletion (like counting Bloom filters) usually require much more space. - -Cuckoo filters provide the flexibility to add and remove items dynamically. A cuckoo filter is based on cuckoo hashing (and therefore named as cuckoo filter). It is essentially a cuckoo hash table storing each key's fingerprint. Cuckoo hash tables can be highly compact, thus a cuckoo filter could use less space than conventional Bloom filters, for applications that require low false positive rates (< 3%). - -For details about the algorithm and citations please use this article for now - -["Cuckoo Filter: Better Than Bloom" by Bin Fan, Dave Andersen and Michael Kaminsky](https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) - -## Implementation details - -The paper cited above leaves several parameters to choose. In this implementation - -1. Every element has 2 possible bucket indices -2. Buckets have a static size of 4 fingerprints -3. Fingerprints have a static size of 8 bits - -1 and 2 are suggested to be the optimum by the authors. The choice of 3 comes down to the desired false positive rate. Given a target false positive rate of `r` and a bucket size `b`, they suggest choosing the fingerprint size `f` using - - f >= log2(2b/r) bits - -With the 8 bit fingerprint size in this repository, you can expect `r ~= 0.03`. -[Other implementations](https://github.com/panmari/cuckoofilter) use 16 bit, which correspond to a false positive rate of `r ~= 0.0001`. - -## Example usage: -```go -package main - -import "fmt" -import cuckoo "github.com/seiflotfy/cuckoofilter" - -func main() { - cf := cuckoo.NewFilter(1000) - cf.InsertUnique([]byte("geeky ogre")) - - // Lookup a string (and it a miss) if it exists in the cuckoofilter - cf.Lookup([]byte("hello")) - - count := cf.Count() - fmt.Println(count) // count == 1 - - // Delete a string (and it a miss) - cf.Delete([]byte("hello")) - - count = cf.Count() - fmt.Println(count) // count == 1 - - // Delete a string (a hit) - cf.Delete([]byte("geeky ogre")) - - count = cf.Count() - fmt.Println(count) // count == 0 - - cf.Reset() // reset -} -``` - -## Documentation: -["Cuckoo Filter on GoDoc"](http://godoc.org/github.com/seiflotfy/cuckoofilter) diff --git a/vendor/github.com/seiflotfy/cuckoofilter/bucket.go b/vendor/github.com/seiflotfy/cuckoofilter/bucket.go deleted file mode 100644 index 4a83fc503..000000000 --- a/vendor/github.com/seiflotfy/cuckoofilter/bucket.go +++ /dev/null @@ -1,45 +0,0 @@ -package cuckoo - -type fingerprint byte - -type bucket [bucketSize]fingerprint - -const ( - nullFp = 0 - bucketSize = 4 -) - -func (b *bucket) insert(fp fingerprint) bool { - for i, tfp := range b { - if tfp == nullFp { - b[i] = fp - return true - } - } - return false -} - -func (b *bucket) delete(fp fingerprint) bool { - for i, tfp := range b { - if tfp == fp { - b[i] = nullFp - return true - } - } - return false -} - -func (b *bucket) getFingerprintIndex(fp fingerprint) int { - for i, tfp := range b { - if tfp == fp { - return i - } - } - return -1 -} - -func (b *bucket) reset() { - for i := range b { - b[i] = nullFp - } -} diff --git a/vendor/github.com/seiflotfy/cuckoofilter/cuckoofilter.go b/vendor/github.com/seiflotfy/cuckoofilter/cuckoofilter.go deleted file mode 100644 index ec0d246de..000000000 --- a/vendor/github.com/seiflotfy/cuckoofilter/cuckoofilter.go +++ /dev/null @@ -1,165 +0,0 @@ -package cuckoo - -import ( - "fmt" - "math/bits" - "math/rand" -) - -const maxCuckooCount = 500 - -// Filter is a probabilistic counter -type Filter struct { - buckets []bucket - count uint - bucketPow uint -} - -// NewFilter returns a new cuckoofilter with a given capacity. -// A capacity of 1000000 is a normal default, which allocates -// about ~1MB on 64-bit machines. -func NewFilter(capacity uint) *Filter { - capacity = getNextPow2(uint64(capacity)) / bucketSize - if capacity == 0 { - capacity = 1 - } - buckets := make([]bucket, capacity) - return &Filter{ - buckets: buckets, - count: 0, - bucketPow: uint(bits.TrailingZeros(capacity)), - } -} - -// Lookup returns true if data is in the counter -func (cf *Filter) Lookup(data []byte) bool { - i1, fp := getIndexAndFingerprint(data, cf.bucketPow) - if cf.buckets[i1].getFingerprintIndex(fp) > -1 { - return true - } - i2 := getAltIndex(fp, i1, cf.bucketPow) - return cf.buckets[i2].getFingerprintIndex(fp) > -1 -} - -// Reset ... -func (cf *Filter) Reset() { - for i := range cf.buckets { - cf.buckets[i].reset() - } - cf.count = 0 -} - -func randi(i1, i2 uint) uint { - if rand.Intn(2) == 0 { - return i1 - } - return i2 -} - -// Insert inserts data into the counter and returns true upon success -func (cf *Filter) Insert(data []byte) bool { - i1, fp := getIndexAndFingerprint(data, cf.bucketPow) - if cf.insert(fp, i1) { - return true - } - i2 := getAltIndex(fp, i1, cf.bucketPow) - if cf.insert(fp, i2) { - return true - } - return cf.reinsert(fp, randi(i1, i2)) -} - -// InsertUnique inserts data into the counter if not exists and returns true upon success -func (cf *Filter) InsertUnique(data []byte) bool { - if cf.Lookup(data) { - return false - } - return cf.Insert(data) -} - -func (cf *Filter) insert(fp fingerprint, i uint) bool { - if cf.buckets[i].insert(fp) { - cf.count++ - return true - } - return false -} - -func (cf *Filter) reinsert(fp fingerprint, i uint) bool { - for k := 0; k < maxCuckooCount; k++ { - j := rand.Intn(bucketSize) - oldfp := fp - fp = cf.buckets[i][j] - cf.buckets[i][j] = oldfp - - // look in the alternate location for that random element - i = getAltIndex(fp, i, cf.bucketPow) - if cf.insert(fp, i) { - return true - } - } - return false -} - -// Delete data from counter if exists and return if deleted or not -func (cf *Filter) Delete(data []byte) bool { - i1, fp := getIndexAndFingerprint(data, cf.bucketPow) - if cf.delete(fp, i1) { - return true - } - i2 := getAltIndex(fp, i1, cf.bucketPow) - return cf.delete(fp, i2) -} - -func (cf *Filter) delete(fp fingerprint, i uint) bool { - if cf.buckets[i].delete(fp) { - if cf.count > 0 { - cf.count-- - } - return true - } - return false -} - -// Count returns the number of items in the counter -func (cf *Filter) Count() uint { - return cf.count -} - -// Encode returns a byte slice representing a Cuckoofilter -func (cf *Filter) Encode() []byte { - bytes := make([]byte, len(cf.buckets)*bucketSize) - for i, b := range cf.buckets { - for j, f := range b { - index := (i * len(b)) + j - bytes[index] = byte(f) - } - } - return bytes -} - -// Decode returns a Cuckoofilter from a byte slice -func Decode(bytes []byte) (*Filter, error) { - var count uint - if len(bytes)%bucketSize != 0 { - return nil, fmt.Errorf("expected bytes to be multiple of %d, got %d", bucketSize, len(bytes)) - } - if len(bytes) == 0 { - return nil, fmt.Errorf("bytes can not be empty") - } - buckets := make([]bucket, len(bytes)/4) - for i, b := range buckets { - for j := range b { - index := (i * len(b)) + j - if bytes[index] != 0 { - buckets[i][j] = fingerprint(bytes[index]) - count++ - } - } - } - return &Filter{ - buckets: buckets, - count: count, - bucketPow: uint(bits.TrailingZeros(uint(len(buckets)))), - }, nil -} diff --git a/vendor/github.com/seiflotfy/cuckoofilter/doc.go b/vendor/github.com/seiflotfy/cuckoofilter/doc.go deleted file mode 100644 index 6f6cbf828..000000000 --- a/vendor/github.com/seiflotfy/cuckoofilter/doc.go +++ /dev/null @@ -1,35 +0,0 @@ -/* -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - -/* -Package cuckoo provides a Cuckoo Filter, a Bloom filter replacement for approximated set-membership queries. - -While Bloom filters are well-known space-efficient data structures to serve queries like "if item x is in a set?", they do not support deletion. Their variances to enable deletion (like counting Bloom filters) usually require much more space. - -Cuckoo filters provide the flexibility to add and remove items dynamically. A cuckoo filter is based on cuckoo hashing (and therefore named as cuckoo filter). It is essentially a cuckoo hash table storing each key's fingerprint. Cuckoo hash tables can be highly compact, thus a cuckoo filter could use less space than conventional Bloom filters, for applications that require low false positive rates (< 3%). - -For details about the algorithm and citations please use this article: - -"Cuckoo Filter: Better Than Bloom" by Bin Fan, Dave Andersen and Michael Kaminsky -(https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) - -Note: -This implementation uses a a static bucket size of 4 fingerprints and a fingerprint size of 1 byte based on my understanding of an optimal bucket/fingerprint/size ratio from the aforementioned paper.*/ -package cuckoo diff --git a/vendor/github.com/seiflotfy/cuckoofilter/scalable_cuckoofilter.go b/vendor/github.com/seiflotfy/cuckoofilter/scalable_cuckoofilter.go deleted file mode 100644 index 693184c9d..000000000 --- a/vendor/github.com/seiflotfy/cuckoofilter/scalable_cuckoofilter.go +++ /dev/null @@ -1,170 +0,0 @@ -package cuckoo - -import ( - "bytes" - "encoding/gob" -) - -const ( - DefaultLoadFactor = 0.9 - DefaultCapacity = 10000 -) - -type ScalableCuckooFilter struct { - filters []*Filter - loadFactor float32 - //when scale(last filter size * loadFactor >= capacity) get new filter capacity - scaleFactor func(capacity uint) uint -} - -type option func(*ScalableCuckooFilter) - -type Store struct { - Bytes [][]byte - LoadFactor float32 -} - -/* - by default option the grow capacity is: - capacity , total - 4096 4096 - 8192 12288 -16384 28672 -32768 61440 -65536 126,976 -*/ -func NewScalableCuckooFilter(opts ...option) *ScalableCuckooFilter { - sfilter := new(ScalableCuckooFilter) - for _, opt := range opts { - opt(sfilter) - } - configure(sfilter) - return sfilter -} - -func (sf *ScalableCuckooFilter) Lookup(data []byte) bool { - for _, filter := range sf.filters { - if filter.Lookup(data) { - return true - } - } - return false -} - -func (sf *ScalableCuckooFilter) Reset() { - for _, filter := range sf.filters { - filter.Reset() - } -} - -func (sf *ScalableCuckooFilter) Insert(data []byte) bool { - needScale := false - lastFilter := sf.filters[len(sf.filters)-1] - if (float32(lastFilter.count) / float32(len(lastFilter.buckets))) > sf.loadFactor { - needScale = true - } else { - b := lastFilter.Insert(data) - needScale = !b - } - if !needScale { - return true - } - newFilter := NewFilter(sf.scaleFactor(uint(len(lastFilter.buckets)))) - sf.filters = append(sf.filters, newFilter) - return newFilter.Insert(data) -} - -func (sf *ScalableCuckooFilter) InsertUnique(data []byte) bool { - if sf.Lookup(data) { - return false - } - return sf.Insert(data) -} - -func (sf *ScalableCuckooFilter) Delete(data []byte) bool { - for _, filter := range sf.filters { - if filter.Delete(data) { - return true - } - } - return false -} - -func (sf *ScalableCuckooFilter) Count() uint { - var sum uint - for _, filter := range sf.filters { - sum += filter.count - } - return sum - -} - -func (sf *ScalableCuckooFilter) Encode() []byte { - slice := make([][]byte, len(sf.filters)) - for i, filter := range sf.filters { - encode := filter.Encode() - slice[i] = encode - } - store := &Store{ - Bytes: slice, - LoadFactor: sf.loadFactor, - } - buf := bytes.NewBuffer(nil) - enc := gob.NewEncoder(buf) - err := enc.Encode(store) - if err != nil { - return nil - } - return buf.Bytes() -} - -func (sf *ScalableCuckooFilter) DecodeWithParam(fBytes []byte, opts ...option) (*ScalableCuckooFilter, error) { - instance, err := DecodeScalableFilter(fBytes) - if err != nil { - return nil, err - } - for _, opt := range opts { - opt(instance) - } - return instance, nil -} - -func DecodeScalableFilter(fBytes []byte) (*ScalableCuckooFilter, error) { - buf := bytes.NewBuffer(fBytes) - dec := gob.NewDecoder(buf) - store := &Store{} - err := dec.Decode(store) - if err != nil { - return nil, err - } - filterSize := len(store.Bytes) - instance := NewScalableCuckooFilter(func(filter *ScalableCuckooFilter) { - filter.filters = make([]*Filter, filterSize) - }, func(filter *ScalableCuckooFilter) { - filter.loadFactor = store.LoadFactor - }) - for i, oneBytes := range store.Bytes { - filter, err := Decode(oneBytes) - if err != nil { - return nil, err - } - instance.filters[i] = filter - } - return instance, nil - -} - -func configure(sfilter *ScalableCuckooFilter) { - if sfilter.loadFactor == 0 { - sfilter.loadFactor = DefaultLoadFactor - } - if sfilter.scaleFactor == nil { - sfilter.scaleFactor = func(currentSize uint) uint { - return currentSize * bucketSize * 2 - } - } - if sfilter.filters == nil { - initFilter := NewFilter(DefaultCapacity) - sfilter.filters = []*Filter{initFilter} - } -} diff --git a/vendor/github.com/seiflotfy/cuckoofilter/util.go b/vendor/github.com/seiflotfy/cuckoofilter/util.go deleted file mode 100644 index 2a0f65b13..000000000 --- a/vendor/github.com/seiflotfy/cuckoofilter/util.go +++ /dev/null @@ -1,52 +0,0 @@ -package cuckoo - -import ( - metro "github.com/dgryski/go-metro" -) - -var ( - altHash = [256]uint{} - masks = [65]uint{} -) - -func init() { - for i := 0; i < 256; i++ { - altHash[i] = (uint(metro.Hash64([]byte{byte(i)}, 1337))) - } - for i := uint(0); i <= 64; i++ { - masks[i] = (1 << i) - 1 - } -} - -func getAltIndex(fp fingerprint, i uint, bucketPow uint) uint { - mask := masks[bucketPow] - hash := altHash[fp] & mask - return (i & mask) ^ hash -} - -func getFingerprint(hash uint64) byte { - // Use least significant bits for fingerprint. - fp := byte(hash%255 + 1) - return fp -} - -// getIndicesAndFingerprint returns the 2 bucket indices and fingerprint to be used -func getIndexAndFingerprint(data []byte, bucketPow uint) (uint, fingerprint) { - hash := metro.Hash64(data, 1337) - fp := getFingerprint(hash) - // Use most significant bits for deriving index. - i1 := uint(hash>>32) & masks[bucketPow] - return i1, fingerprint(fp) -} - -func getNextPow2(n uint64) uint { - n-- - n |= n >> 1 - n |= n >> 2 - n |= n >> 4 - n |= n >> 8 - n |= n >> 16 - n |= n >> 32 - n++ - return uint(n) -} diff --git a/vendor/modules.txt b/vendor/modules.txt index e0a201d24..c02ffef02 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -64,9 +64,6 @@ github.com/davecgh/go-spew/spew # github.com/dgryski/go-expirecache v0.0.0-20170314133854-743ef98b2adb ## explicit github.com/dgryski/go-expirecache -# github.com/dgryski/go-metro v0.0.0-20211217172704-adc40b04c140 -## explicit -github.com/dgryski/go-metro # github.com/dgryski/go-trigram v0.0.0-20160407183937-79ec494e1ad0 ## explicit github.com/dgryski/go-trigram @@ -158,6 +155,9 @@ github.com/googleapis/gax-go/v2/apierror github.com/googleapis/gax-go/v2/apierror/internal/proto github.com/googleapis/gax-go/v2/callctx github.com/googleapis/gax-go/v2/internal +# github.com/greatroar/blobloom v0.8.0 +## explicit; go 1.14 +github.com/greatroar/blobloom # github.com/hashicorp/errwrap v1.1.0 ## explicit github.com/hashicorp/errwrap @@ -283,9 +283,6 @@ github.com/prometheus/procfs/internal/util # github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 ## explicit github.com/rcrowley/go-metrics -# github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb -## explicit; go 1.15 -github.com/seiflotfy/cuckoofilter # github.com/sevlyar/go-daemon v0.1.6 ## explicit github.com/sevlyar/go-daemon