Skip to content

Commit

Permalink
Merge pull request #317 from StephenButtolph/sampling
Browse files Browse the repository at this point in the history
added new sampling algos and optimized initializations
  • Loading branch information
StephenButtolph authored Aug 23, 2020
2 parents 170716e + c6e95f0 commit 053a094
Show file tree
Hide file tree
Showing 14 changed files with 285 additions and 35 deletions.
8 changes: 8 additions & 0 deletions snow/validators/set.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ func NewSet() Set {
}
}

// NewBestSet returns a new, empty set of validators.
func NewBestSet(expectedSampleSize int) Set {
return &set{
vdrMap: make(map[[20]byte]int),
sampler: sampler.NewBestWeightedWithoutReplacement(expectedSampleSize),
}
}

// set of validators. Validator function results are cached. Therefore, to
// update a validators weight, one should ensure to call add with the updated
// validator.
Expand Down
3 changes: 3 additions & 0 deletions utils/sampler/uniform.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ type Uniform interface {
Initialize(sampleRange uint64) error
Sample(length int) ([]uint64, error)
}

// NewUniform returns a new sampler
func NewUniform() Uniform { return &uniformReplacer{} }
15 changes: 7 additions & 8 deletions utils/sampler/uniform_benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,17 @@ import (

// BenchmarkAllUniform
func BenchmarkAllUniform(b *testing.B) {
sizes := []int{
1,
5,
25,
50,
75,
100,
sizes := []uint64{
30,
35,
500,
10000,
100000,
}
for _, s := range uniformSamplers {
for _, size := range sizes {
b.Run(fmt.Sprintf("sampler %s with %d elements uniformly", s.name, size), func(b *testing.B) {
UniformBenchmark(b, s.sampler, 1000000, size)
UniformBenchmark(b, s.sampler, size, 30)
})
}
}
Expand Down
81 changes: 81 additions & 0 deletions utils/sampler/uniform_best.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// (c) 2019-2020, Ava Labs, Inc. All rights reserved.
// See the file LICENSE for licensing terms.

package sampler

import (
"errors"
"math"
"math/rand"
"time"

"github.com/ava-labs/gecko/utils/timer"
)

var (
errNoValidUniformSamplers = errors.New("no valid uniform samplers found")
)

func init() { rand.Seed(time.Now().UnixNano()) }

// uniformBest implements the Uniform interface.
//
// Sampling is performed by using another implementation of the Uniform
// interface.
//
// Initialization attempts to find the best sampling algorithm given the dataset
// by performing a benchmark of the provided implementations.
type uniformBest struct {
Uniform
samplers []Uniform
maxSampleSize int
benchmarkIterations int
clock timer.Clock
}

// NewBestUniform returns a new sampler
func NewBestUniform(expectedSampleSize int) Uniform {
return &uniformBest{
samplers: []Uniform{
&uniformReplacer{},
&uniformResample{},
},
maxSampleSize: expectedSampleSize,
benchmarkIterations: 100,
}
}

func (s *uniformBest) Initialize(length uint64) error {
s.Uniform = nil
bestDuration := time.Duration(math.MaxInt64)

sampleSize := s.maxSampleSize
if length < uint64(sampleSize) {
sampleSize = int(length)
}

samplerLoop:
for _, sampler := range s.samplers {
if err := sampler.Initialize(length); err != nil {
continue
}

start := s.clock.Time()
for i := 0; i < s.benchmarkIterations; i++ {
if _, err := sampler.Sample(sampleSize); err != nil {
continue samplerLoop
}
}
end := s.clock.Time()
duration := end.Sub(start)
if duration < bestDuration {
bestDuration = duration
s.Uniform = sampler
}
}

if s.Uniform == nil {
return errNoValidUniformSamplers
}
return nil
}
3 changes: 0 additions & 3 deletions utils/sampler/uniform_replacer.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ type uniformReplacer struct {
length uint64
}

// NewUniform returns a new sampler
func NewUniform() Uniform { return &uniformReplacer{} }

func (s *uniformReplacer) Initialize(length uint64) error {
if length > math.MaxInt64 {
return errOutOfRange
Expand Down
54 changes: 54 additions & 0 deletions utils/sampler/uniform_resample.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// (c) 2019-2020, Ava Labs, Inc. All rights reserved.
// See the file LICENSE for licensing terms.

package sampler

import (
"math"
"math/rand"
"time"
)

func init() { rand.Seed(time.Now().UnixNano()) }

// uniformResample allows for sampling over a uniform distribution without
// replacement.
//
// Sampling is performed by sampling with replacement and resampling if a
// duplicate is sampled.
//
// Initialization takes O(1) time.
//
// Sampling is performed in O(count) time and O(count) space.
type uniformResample struct {
length uint64
}

func (s *uniformResample) Initialize(length uint64) error {
if length > math.MaxInt64 {
return errOutOfRange
}
s.length = length
return nil
}

func (s *uniformResample) Sample(count int) ([]uint64, error) {
if count < 0 || s.length < uint64(count) {
return nil, errOutOfRange
}

drawn := make(map[uint64]struct{}, count)
results := make([]uint64, count)
for i := 0; i < count; i++ {
draw := uint64(rand.Int63n(int64(s.length)))
if _, ok := drawn[draw]; ok {
i--
continue
}
drawn[draw] = struct{}{}

results[i] = draw
}

return results, nil
}
8 changes: 8 additions & 0 deletions utils/sampler/uniform_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ var (
name: "replacer",
sampler: &uniformReplacer{},
},
{
name: "resampler",
sampler: &uniformResample{},
},
{
name: "best",
sampler: NewBestUniform(30),
},
}
uniformTests = []struct {
name string
Expand Down
14 changes: 14 additions & 0 deletions utils/sampler/weighted.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,17 @@ type Weighted interface {
Initialize(weights []uint64) error
Sample(sampleValue uint64) (int, error)
}

// NewWeighted returns a new sampler
func NewWeighted() Weighted {
return &weightedBest{
samplers: []Weighted{
&weightedArray{},
&weightedHeap{},
&weightedUniform{
maxWeight: 1 << 10,
},
},
benchmarkIterations: 100,
}
}
78 changes: 70 additions & 8 deletions utils/sampler/weighted_benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ import (
safemath "github.com/ava-labs/gecko/utils/math"
)

// BenchmarkAllWeighted
func BenchmarkAllWeighted(b *testing.B) {
// BenchmarkAllWeightedSampling
func BenchmarkAllWeightedSampling(b *testing.B) {
pows := []float64{
0,
1,
Expand All @@ -31,17 +31,52 @@ func BenchmarkAllWeighted(b *testing.B) {
for _, s := range weightedSamplers {
for _, pow := range pows {
for _, size := range sizes {
if WeightedPowBenchmark(b, s.sampler, pow, size) {
if WeightedPowBenchmarkSampler(b, s.sampler, pow, size) {
b.Run(fmt.Sprintf("sampler %s with %d elements at x^%.1f", s.name, size, pow), func(b *testing.B) {
WeightedPowBenchmark(b, s.sampler, pow, size)
WeightedPowBenchmarkSampler(b, s.sampler, pow, size)
})
}
}
}
for _, size := range sizes {
if WeightedSingletonBenchmark(b, s.sampler, size) {
if WeightedSingletonBenchmarkSampler(b, s.sampler, size) {
b.Run(fmt.Sprintf("sampler %s with %d singleton elements", s.name, size), func(b *testing.B) {
WeightedSingletonBenchmark(b, s.sampler, size)
WeightedSingletonBenchmarkSampler(b, s.sampler, size)
})
}
}
}
}

// BenchmarkAllWeightedInitializer
func BenchmarkAllWeightedInitializer(b *testing.B) {
pows := []float64{
0,
1,
2,
3,
}
sizes := []int{
10,
500,
1000,
50000,
100000,
}
for _, s := range weightedSamplers {
for _, pow := range pows {
for _, size := range sizes {
if WeightedPowBenchmarkSampler(b, s.sampler, pow, size) {
b.Run(fmt.Sprintf("initializer %s with %d elements at x^%.1f", s.name, size, pow), func(b *testing.B) {
WeightedPowBenchmarkInitializer(b, s.sampler, pow, size)
})
}
}
}
for _, size := range sizes {
if WeightedSingletonBenchmarkSampler(b, s.sampler, size) {
b.Run(fmt.Sprintf("initializer %s with %d singleton elements", s.name, size), func(b *testing.B) {
WeightedSingletonBenchmarkInitializer(b, s.sampler, size)
})
}
}
Expand All @@ -67,7 +102,7 @@ func CalcWeightedPoW(exponent float64, size int) (uint64, []uint64, error) {
return totalWeight, weights, nil
}

func WeightedPowBenchmark(
func WeightedPowBenchmarkSampler(
b *testing.B,
s Weighted,
exponent float64,
Expand All @@ -88,7 +123,7 @@ func WeightedPowBenchmark(
return true
}

func WeightedSingletonBenchmark(b *testing.B, s Weighted, size int) bool {
func WeightedSingletonBenchmarkSampler(b *testing.B, s Weighted, size int) bool {
weights := make([]uint64, size)
weights[0] = uint64(math.MaxInt64 - size + 1)
for i := 1; i < len(weights); i++ {
Expand All @@ -106,3 +141,30 @@ func WeightedSingletonBenchmark(b *testing.B, s Weighted, size int) bool {
}
return true
}

func WeightedPowBenchmarkInitializer(
b *testing.B,
s Weighted,
exponent float64,
size int,
) {
_, weights, _ := CalcWeightedPoW(exponent, size)

b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = s.Initialize(weights)
}
}

func WeightedSingletonBenchmarkInitializer(b *testing.B, s Weighted, size int) {
weights := make([]uint64, size)
weights[0] = uint64(math.MaxInt64 - size + 1)
for i := 1; i < len(weights); i++ {
weights[i] = 1
}

b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = s.Initialize(weights)
}
}
13 changes: 8 additions & 5 deletions utils/sampler/weighted_best.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import (
)

var (
errNoValidSamplers = errors.New("no valid samplers found")
errNoValidWeightedSamplers = errors.New("no valid weighted samplers found")
)

func init() { rand.Seed(time.Now().UnixNano()) }
Expand Down Expand Up @@ -48,9 +48,12 @@ func (s *weightedBest) Initialize(weights []uint64) error {
return errWeightsTooLarge
}

samples := make([]uint64, s.benchmarkIterations)
for i := range samples {
samples[i] = uint64(rand.Int63n(int64(totalWeight)))
samples := []uint64(nil)
if totalWeight > 0 {
samples = make([]uint64, s.benchmarkIterations)
for i := range samples {
samples[i] = uint64(rand.Int63n(int64(totalWeight)))
}
}

s.Weighted = nil
Expand All @@ -75,7 +78,7 @@ func (s *weightedBest) Initialize(weights []uint64) error {
}

if s.Weighted == nil {
return errNoValidSamplers
return errNoValidWeightedSamplers
}
return nil
}
4 changes: 3 additions & 1 deletion utils/sampler/weighted_heap.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ func (s *weightedHeap) Initialize(weights []uint64) error {

// Initialize the heap
for i := len(s.heap) - 1; i > 0; i-- {
parentIndex := (i - 1) / 2
// Explicitly performing a shift here allows the compiler to avoid
// checking for negative numbers, which saves a couple cycles
parentIndex := (i - 1) >> 1
newWeight, err := safemath.Add64(
s.heap[parentIndex].cumulativeWeight,
s.heap[i].cumulativeWeight,
Expand Down
Loading

0 comments on commit 053a094

Please sign in to comment.