From 7da7d644a6769cc73dcc01d50f41a7fc696f5f9f Mon Sep 17 00:00:00 2001 From: Andrey Pechkurov Date: Sat, 13 Jul 2024 11:27:47 +0300 Subject: [PATCH] Fix Map/MapOf capacity calculation for WithPresize --- README.md | 13 +++++++++++++ map.go | 2 +- map_test.go | 6 +++--- mapof.go | 8 ++++---- mapof_test.go | 14 +++++++------- 5 files changed, 28 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index c468794..81be1d6 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,19 @@ m.Store(Point{42, 42}, 42) v, ok := m.Load(point{42, 42}) ``` +Both maps used the built-in Golang hash function which has DDOS protection. This means that each map instance gets its own seed number and the hash function uses that seed for hash code calculation. However, for smaller keys this hash function has some overhead. So, if you don't need DDOS protection, you may provide a custom hash function when creating a `MapOf`. For instance, Murmur3 finalizer does a decent job when it comes to integers: + +```go +m := NewMapOfWithHasher[int, int](func(i int, _ uint64) uint64 { + h := uint64(i) + h = (h ^ (h >> 33)) * 0xff51afd7ed558ccd + h = (h ^ (h >> 33)) * 0xc4ceb9fe1a85ec53 + return h ^ (h >> 33) +}) +``` + +When benchmarking concurrent maps, make sure to configure all of the competitors with the same hash function or, at least, take hash function performance into the consideration. + ### MPMCQueue A `MPMCQueue` is a bounded multi-producer multi-consumer concurrent queue. diff --git a/map.go b/map.go index 8e98c1e..6c5b6eb 100644 --- a/map.go +++ b/map.go @@ -163,7 +163,7 @@ func NewMap(options ...func(*MapConfig)) *Map { if c.sizeHint <= defaultMinMapTableLen*entriesPerMapBucket { table = newMapTable(defaultMinMapTableLen) } else { - tableLen := nextPowOf2(uint32(c.sizeHint / entriesPerMapBucket)) + tableLen := nextPowOf2(uint32((float64(c.sizeHint) / entriesPerMapBucket) / mapLoadFactor)) table = newMapTable(int(tableLen)) } m.minTableLen = len(table.buckets) diff --git a/map_test.go b/map_test.go index a371c75..74a3885 100644 --- a/map_test.go +++ b/map_test.go @@ -584,9 +584,9 @@ func TestNewMapPresized(t *testing.T) { func TestNewMapPresized_DoesNotShrinkBelowMinTableLen(t *testing.T) { const minTableLen = 1024 - const numEntries = minTableLen * EntriesPerMapBucket + const numEntries = int(minTableLen * EntriesPerMapBucket * MapLoadFactor) m := NewMap(WithPresize(numEntries)) - for i := 0; i < numEntries; i++ { + for i := 0; i < 2*numEntries; i++ { m.Store(strconv.Itoa(i), i) } @@ -595,7 +595,7 @@ func TestNewMapPresized_DoesNotShrinkBelowMinTableLen(t *testing.T) { t.Fatalf("table did not grow: %d", stats.RootBuckets) } - for i := 0; i < numEntries; i++ { + for i := 0; i < 2*numEntries; i++ { m.Delete(strconv.Itoa(int(i))) } diff --git a/mapof.go b/mapof.go index b7acb2f..4c4ad08 100644 --- a/mapof.go +++ b/mapof.go @@ -87,9 +87,9 @@ func NewMapOf[K comparable, V any](options ...func(*MapConfig)) *MapOf[K, V] { return NewMapOfWithHasher[K, V](defaultHasher[K](), options...) } -// NewMapOf creates a new MapOf instance configured with the given -// hasher and options. The hash function is used instead of -// the built-in hash function configured when a map is created +// NewMapOfWithHasher creates a new MapOf instance configured with +// the given hasher and options. The hash function is used instead +// of the built-in hash function configured when a map is created // with the NewMapOf function. func NewMapOfWithHasher[K comparable, V any]( hasher func(K, uint64) uint64, @@ -109,7 +109,7 @@ func NewMapOfWithHasher[K comparable, V any]( if c.sizeHint <= defaultMinMapTableLen*entriesPerMapOfBucket { table = newMapOfTable[K, V](defaultMinMapTableLen) } else { - tableLen := nextPowOf2(uint32(c.sizeHint / entriesPerMapOfBucket)) + tableLen := nextPowOf2(uint32((float64(c.sizeHint) / entriesPerMapOfBucket) / mapLoadFactor)) table = newMapOfTable[K, V](int(tableLen)) } m.minTableLen = len(table.buckets) diff --git a/mapof_test.go b/mapof_test.go index ffbb85d..e0433a1 100644 --- a/mapof_test.go +++ b/mapof_test.go @@ -647,19 +647,19 @@ func TestNewMapOfPresized(t *testing.T) { assertMapOfCapacity(t, NewMapOf[string, string](WithPresize(0)), DefaultMinMapOfTableCap) assertMapOfCapacity(t, NewMapOfPresized[string, string](-100), DefaultMinMapOfTableCap) assertMapOfCapacity(t, NewMapOf[string, string](WithPresize(-100)), DefaultMinMapOfTableCap) - assertMapOfCapacity(t, NewMapOfPresized[string, string](500), 640) - assertMapOfCapacity(t, NewMapOf[string, string](WithPresize(500)), 640) - assertMapOfCapacity(t, NewMapOfPresized[int, int](1_000_000), 1_310_720) - assertMapOfCapacity(t, NewMapOf[int, int](WithPresize(1_000_000)), 1_310_720) + assertMapOfCapacity(t, NewMapOfPresized[string, string](500), 1280) + assertMapOfCapacity(t, NewMapOf[string, string](WithPresize(500)), 1280) + assertMapOfCapacity(t, NewMapOfPresized[int, int](1_000_000), 2621440) + assertMapOfCapacity(t, NewMapOf[int, int](WithPresize(1_000_000)), 2621440) assertMapOfCapacity(t, NewMapOfPresized[point, point](100), 160) assertMapOfCapacity(t, NewMapOf[point, point](WithPresize(100)), 160) } func TestNewMapOfPresized_DoesNotShrinkBelowMinTableLen(t *testing.T) { const minTableLen = 1024 - const numEntries = minTableLen * EntriesPerMapOfBucket + const numEntries = int(minTableLen * EntriesPerMapOfBucket * MapLoadFactor) m := NewMapOf[int, int](WithPresize(numEntries)) - for i := 0; i < numEntries; i++ { + for i := 0; i < 2*numEntries; i++ { m.Store(i, i) } @@ -668,7 +668,7 @@ func TestNewMapOfPresized_DoesNotShrinkBelowMinTableLen(t *testing.T) { t.Fatalf("table did not grow: %d", stats.RootBuckets) } - for i := 0; i < numEntries; i++ { + for i := 0; i < 2*numEntries; i++ { m.Delete(i) }