Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move to 64bit hash and custom hash functions #36

Merged
merged 5 commits into from
Jan 10, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 24 additions & 23 deletions hashmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package immutable

import (
"math"
"math/rand"
"time"

"github.com/object88/immutable/memory"
)
Expand All @@ -26,11 +28,11 @@ type HashMap struct {
buckets []*bucket
lobMask uint32
lobSize uint8
seed uint32
}

const (
// lobSize = 3
bucketCapacity = 1 << 3 //lobSize
bucketCapacity = 1 << 3
loadFactor = 6.0
)

Expand All @@ -56,17 +58,21 @@ func (h *HashMap) Get(key Key) Value {
return nil
}

hashkey := key.Hash()
hashkey := key.Hash(h.seed)

selectedBucket := hashkey & h.lobMask
selectedBucket := hashkey & uint64(h.lobMask)
b := h.buckets[selectedBucket]
maskedHash := hashkey >> h.lobSize

totalEntries := uint64(b.entryCount)

// fmt.Printf("\nlobSize: %d; h.lobMask: 0x%016x\n", h.lobSize, h.lobMask)
// fmt.Printf("hashKey: 0x%016x / selectedBucket: %d / mashedHash: 0x%016x\n", hashkey, selectedBucket, maskedHash)

for b != nil {
for index := uint64(0); index < totalEntries; index++ {
if uint32(b.hobs.Read(index)) == maskedHash && b.entries[index].key == key {
// fmt.Printf("0x%016x <-> 0x%016x :: %s <-> %s\n", b.hobs.Read(index), maskedHash, b.entries[index].key, key)
if b.hobs.Read(index) == maskedHash && b.entries[index].key == key {
return b.entries[index].value
}
}
Expand Down Expand Up @@ -135,11 +141,10 @@ func (h *HashMap) Insert(key Key, value Value) (*HashMap, error) {
}

var result *HashMap
abort := make(chan struct{})
size := h.Size()
if matched {
result = createHashMap(size, h.options)

abort := make(chan struct{})
for kvp := range h.iterate(abort) {
insertValue := kvp.value
if kvp.key == key {
Expand All @@ -150,7 +155,6 @@ func (h *HashMap) Insert(key Key, value Value) (*HashMap, error) {
} else {
size++
result = createHashMap(size, h.options)
abort := make(chan struct{})
for kvp := range h.iterate(abort) {
result.internalSet(kvp.key, kvp.value)
}
Expand Down Expand Up @@ -226,13 +230,7 @@ func (h *HashMap) Size() int {
}

func (h *HashMap) instantiate(size int, contents []*keyValuePair) *BaseStruct {
var options *HashMapOptions
if h != nil {
options = h.options
} else {
options = NewHashMapOptions()
}
hash := createHashMap(size, options)
hash := createHashMap(size, h.options)

for _, v := range contents {
if v != nil {
Expand All @@ -244,26 +242,25 @@ func (h *HashMap) instantiate(size int, contents []*keyValuePair) *BaseStruct {
}

func (h *HashMap) internalSet(key Key, value Value) {
// lobSize := h.lobSize // uint32(memory.PowerOf(h.size))
hobSize := uint32(32 - h.lobSize)
hobSize := uint32(64 - h.lobSize)

hashkey := key.Hash()
selectedBucket := hashkey & h.lobMask
hashkey := key.Hash(h.seed)
selectedBucket := hashkey & uint64(h.lobMask)
// fmt.Printf("At [%s,%s]; h:0x%08x, sb: %d, lob: 0x%08x\n", key, value, hashkey, selectedBucket, hashkey>>h.lobSize)
b := h.buckets[selectedBucket]
if b == nil {
// Create the bucket.
b = createEmptyBucket(memory.LargeBlock, hobSize)
b = createEmptyBucket(h.options.BucketStrategy, hobSize)
h.buckets[selectedBucket] = b
}
for b.entryCount == 8 {
if b.overflow == nil {
b.overflow = createEmptyBucket(memory.LargeBlock, hobSize)
b.overflow = createEmptyBucket(h.options.BucketStrategy, hobSize)
}
b = b.overflow
}
b.entries[b.entryCount] = entry{key, value}
b.hobs.Assign(uint64(b.entryCount), uint64(hashkey>>h.lobSize))
b.hobs.Assign(uint64(b.entryCount), hashkey>>h.lobSize)
b.entryCount++
}

Expand All @@ -274,7 +271,11 @@ func createHashMap(size int, options *HashMapOptions) *HashMap {
lobMask := uint32(^(0xffffffff << lobSize))
buckets := make([]*bucket, initialSize)

return &HashMap{options, initialCount, int(initialSize), buckets, lobMask, lobSize}
src := rand.NewSource(time.Now().UnixNano())
random := rand.New(src)
seed := uint32(random.Int31())

return &HashMap{options, initialCount, int(initialSize), buckets, lobMask, lobSize, seed}
}

func createEmptyBucket(blockSize memory.BlockSize, hobSize uint32) *bucket {
Expand Down
4 changes: 2 additions & 2 deletions hashmap_customkey_badhash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ type MyBadKey struct {
value int
}

func (k MyBadKey) Hash() uint32 {
func (k MyBadKey) Hash(seed uint32) uint64 {
if k.value%2 == 0 {
return 0x0
}
return 0xffffffff
return 0xffffffffffffffff
}

func (k MyBadKey) String() string {
Expand Down
4 changes: 2 additions & 2 deletions hashmap_customkey_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ type MyKey struct {
value int
}

func (k MyKey) Hash() uint32 {
return uint32(k.value)
func (k MyKey) Hash(seed uint32) uint64 {
return uint64(k.value)
}

func (k MyKey) String() string {
Expand Down
4 changes: 2 additions & 2 deletions hashmap_gostringer.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ func (h *HashMap) GoString() string {
buffer.WriteString(fmt.Sprintf(" entryCount: %d\n", b.entryCount))
buffer.WriteString(" entries: [\n")
for b != nil {
for i := uint32(0); i < uint32(b.entryCount); i++ {
buffer.WriteString(fmt.Sprintf(" [0x%08x,%s] -> %s\n", b.hobs.Read(uint64(i)), b.entries[i].key, b.entries[i].value))
for i := uint64(0); i < uint64(b.entryCount); i++ {
buffer.WriteString(fmt.Sprintf(" [0x%016x,%s] -> %s\n", b.hobs.Read(i), b.entries[i].key, b.entries[i].value))
}

b = b.overflow
Expand Down
5 changes: 3 additions & 2 deletions hashmap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ func Test_Hashmap(t *testing.T) {
if original.Size() != len(data) {
t.Fatalf("Incorrect size")
}
fmt.Println(original.String())
for k, v := range data {
result := original.Get(k)
if result != v {
Expand Down Expand Up @@ -303,8 +302,10 @@ func Test_Hashmap_ReadAndWriteLargeDataSet(t *testing.T) {
}

original := NewHashMap(contents, nil)
for k, v := range contents {
for i := 0; i < max; i++ {
k := IntKey(i)
result := original.Get(k)
v := contents[k]
if result != v {
t.Fatalf("At %s; expected %d, got %d\n", k, v, result)
}
Expand Down
45 changes: 37 additions & 8 deletions key.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package immutable

import (
"encoding/binary"
"fmt"
"hash/fnv"
"math/rand"
"time"
)

// IntKey is an integer-based Key
Expand All @@ -12,13 +12,42 @@ type IntKey int
// StringKey is a string-based Key
type StringKey string

// Hash calculates the 32-bit hash
func (k IntKey) Hash() uint32 {
hasher := fnv.New32a()
// // Hash calculates the 32-bit hash
// func (k IntKey) Hash() uint32 {
// hasher := fnv.New32a()
//
// binary.Write(hasher, binary.LittleEndian, uint32(k))
// hash := hasher.Sum32()
// return hash
// }

const m1 = 194865226553
const m2 = 24574600569641

var hashkey [4]uintptr

func init() {
src := rand.NewSource(time.Now().UnixNano())
random := rand.New(src)
// getRandomData((*[len(hashkey) * sys.PtrSize]byte)(unsafe.Pointer(&hashkey))[:])
hashkey[0] = uintptr(random.Int63() | 1) // make sure these numbers are odd
hashkey[1] = uintptr(random.Int63() | 1)
hashkey[2] = uintptr(random.Int63() | 1)
hashkey[3] = uintptr(random.Int63() | 1)
}

// Hash does what Hash cannot.
func (k IntKey) Hash(seed uint32) uint64 {
k1 := uint64(k)
h := uint64(uintptr(seed) + 8*hashkey[0])
h ^= (k1 & 0xffffffff)
h ^= (k1 & 0xffffffff00000000) << 32
h = rotl31(h*m1) * m2
return h
}

binary.Write(hasher, binary.LittleEndian, uint32(k))
hash := hasher.Sum32()
return hash
func rotl31(x uint64) uint64 {
return (x << 31) | (x >> (64 - 31))
}

func (k IntKey) String() string {
Expand Down
2 changes: 0 additions & 2 deletions memory/constants.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
package memory

const (
// allUint32bits = ^uint32(0)
bitsInExtraLargeBlock = 64
bitsInLargeBlock = 32
bitsInSmallBlock = 8
// bitsInBlock = uint32(unsafe.Sizeof(uint32(0)) * bitsInSmallBlock)
)
63 changes: 31 additions & 32 deletions memory/memory32.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,45 +10,46 @@ type Memories32 struct {

// Assign sets a value to the internal memory at the given index
func (m *Memories32) Assign(index uint64, value uint64) {
bitsRemaining := uint64(m.bitsPerEntry)
offset := bitsRemaining * index
bitsRemaining := m.bitsPerEntry
offset := bitsRemaining * uint32(index)
byteOffset := offset / bitsInLargeBlock
bitOffset := offset % bitsInLargeBlock

// fmt.Printf("\nAssigning %032b to index %d\n", value, index)
// fmt.Printf("byteOffset: %d, bitOffset: %d, bitsRemaining: %d\n", byteOffset, bitOffset, bitsRemaining)
// fmt.Printf("\nAssigning %064b to index %d\n", value, index)

writeBitCount := bitsInLargeBlock - bitOffset
if writeBitCount > bitsRemaining {
writeBitCount = bitsRemaining
}
initial := uint64(m.m[byteOffset])
mask := uint64(fullBlock << writeBitCount)
result := (initial & ^(^mask << bitOffset)) | ((value & ^mask) << bitOffset)
m.m[byteOffset] = uint32(result)
// fmt.Printf("byteOffset: %d, bitOffset: %d, bitsRemaining: %d, writeBitCount: %d\n", byteOffset, bitOffset, bitsRemaining, writeBitCount)
initial := m.m[byteOffset]
mask := ^(fullExtraLargeBlock << writeBitCount)
result := uint32(value&mask)<<bitOffset | initial&^((^(fullBlock << writeBitCount))<<bitOffset)
m.m[byteOffset] = result

// fmt.Printf("result at %d: %032b\n", byteOffset, m.m[byteOffset])
// fmt.Printf("result at %d: %032b -> %032b\n", byteOffset, initial, result)

bitsRemaining -= writeBitCount
byteOffset++

if bitsRemaining > 32 {
o := (uint64(m.bitsPerEntry) - bitsRemaining)
result := ((value & (uint64(fullBlock) << o)) >> o)
m.m[byteOffset] = uint32(result)
// fmt.Printf("result at %d: %032b\n", byteOffset, m.m[byteOffset])
if bitsRemaining >= 32 {
o := m.bitsPerEntry - bitsRemaining
result := uint32((value & (fullExtraLargeBlock << o)) >> o)
m.m[byteOffset] = result
// fmt.Printf("result at %d: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx -> %032b\n", byteOffset, result)

bitsRemaining -= 32
byteOffset++
}

if bitsRemaining > 0 {
initial := uint64(m.m[byteOffset])
mask := uint64(fullBlock << bitsRemaining)
result := (initial & mask) | ((value & (^mask << writeBitCount)) >> writeBitCount)
m.m[byteOffset] = uint32(result)
writeBitCount = m.bitsPerEntry - bitsRemaining
initial := m.m[byteOffset]
mask := fullExtraLargeBlock << bitsRemaining
result := (initial & (fullBlock << bitsRemaining)) | uint32((value&((^mask)<<writeBitCount))>>writeBitCount)
m.m[byteOffset] = result

// fmt.Printf("result at %d: %032b\n", byteOffset, m.m[byteOffset])
// fmt.Printf("result at %d: %032b -> %032b\n", byteOffset, initial, result)
}
}

Expand All @@ -65,27 +66,25 @@ func (m *Memories32) Read(index uint64) (result uint64) {
if readBitCount > bitsRemaining {
readBitCount = bitsRemaining
}
initial := uint64(m.m[byteOffset])
mask := uint64(^(fullBlock << readBitCount)) << bitOffset
initial := m.m[byteOffset]
mask := ^(fullBlock << readBitCount) << bitOffset
result = uint64((initial & mask) >> bitOffset)

bitsRemaining -= readBitCount
byteOffset++

if bitsRemaining > 0 {
readBitCount = bitsRemaining
if readBitCount > bitsInLargeBlock {
readBitCount = bitsInLargeBlock
}
// fmt.Printf("--> %064b; %d; %d\n", result, bitsRemaining, readBitCount)
initial := uint64(m.m[byteOffset+1])
result |= ((initial & uint64(^(fullBlock << readBitCount))) << (uint64(m.bitsPerEntry) - bitsRemaining))
bitsRemaining -= readBitCount
if bitsRemaining >= 32 {
// fmt.Printf("--> %064b; %d; %d\n", result, bitsRemaining, 32)
initial := m.m[byteOffset]
result |= (uint64(initial) << (uint64(m.bitsPerEntry) - bitsRemaining))
bitsRemaining -= 32
byteOffset++
}

if bitsRemaining > 0 {
initial := uint64(m.m[byteOffset+2])
initial := m.m[byteOffset]
// fmt.Printf("--> %064b; %d\n", result, bitsRemaining)
result |= ((initial & uint64(^(fullBlock << bitsRemaining))) << (uint64(m.bitsPerEntry) - bitsRemaining))
result |= uint64(initial&(fullBlock>>(32-bitsRemaining))) << (uint64(m.bitsPerEntry) - bitsRemaining)
}
// fmt.Printf("--> %064b\n", result)

Expand Down
Loading