-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhash_table_expected.py
256 lines (224 loc) · 9.15 KB
/
hash_table_expected.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
import random, sys, time
###########################################################################
# #
# Implement a hash table from scratch! (⑅•ᴗ•⑅) #
# #
# Please do not use Python's dictionary or Python's collections library. #
# The goal is to implement the data structure yourself. #
# #
###########################################################################
# Hash function.
#
# |key|: string
# Return value: a hash value
def calculate_hash(key):
assert type(key) == str
hash = 0
for i in key:
# hash += ord(i)
hash = int(hash * 128 + ord(i))
return hash
# An item object that represents one key - value pair in the hash table.
class Item:
# |key|: The key of the item. The key must be a string.
# |value|: The value of the item.
# |next|: The next item in the linked list. If this is the last item in the
# linked list, |next| is None.
def __init__(self, key, value, next):
assert type(key) == str
self.key = key
self.value = value
self.next = next
# The main data structure of the hash table that stores key - value pairs.
# The key must be a string. The value can be any type.
#
# |self.bucket_size|: The bucket size.
# |self.buckets|: An array of the buckets. self.buckets[hash % self.bucket_size]
# stores a linked list of items whose hash value is |hash|.
# |self.item_count|: The total number of items in the hash table.
class HashTable:
# Initialize the hash table.
def __init__(self):
# Set the initial bucket size to 97. A prime number is chosen to reduce
# hash conflicts.
self.bucket_size = 97
self.buckets = [None] * self.bucket_size
self.item_count = 0
# Put an item to the hash table. If the key already exists, the
# corresponding value is updated to a new value.
#
# |key|: The key of the item.
# |value|: The value of the item.
# Return value: True if a new item is added. False if the key already exists
# and the value is updated.
def put(self, key, value):
assert type(key) == str
self.check_size() # Note: Don't remove this code.
bucket_index = calculate_hash(key) % self.bucket_size
item = self.buckets[bucket_index]
while item:
if item.key == key:
item.value = value
return False
item = item.next
new_item = Item(key, value, self.buckets[bucket_index])
self.buckets[bucket_index] = new_item
self.item_count += 1
# Rehash the hash table if needed.
self.rehash_if_needed()
return True
# Get an item from the hash table.
#
# |key|: The key.
# Return value: If the item is found, (the value of the item, True) is
# returned. Otherwise, (None, False) is returned.
def get(self, key):
assert type(key) == str
self.check_size() # Note: Don't remove this code.
bucket_index = calculate_hash(key) % self.bucket_size
item = self.buckets[bucket_index]
while item:
if item.key == key:
return (item.value, True)
item = item.next
return (None, False)
# Delete an item from the hash table.
#
# |key|: The key.
# Return value: True if the item is found and deleted successfully. False
# otherwise.
def delete(self, key):
assert type(key) == str
self.check_size() # Note: Don't remove this code.
bucket_index = calculate_hash(key) % self.bucket_size
prev_item = None
item = self.buckets[bucket_index]
while item:
if item.key == key:
if prev_item:
prev_item.next = item.next
else:
self.buckets[bucket_index] = item.next
self.item_count -= 1
# Rehash the hash table if needed.
self.rehash_if_needed()
return True
prev_item = item
item = item.next
return False
# Rehash the hash table.
def rehash_if_needed(self):
# Expand the buckets when the hash table is 70% or more used.
if self.item_count >= self.bucket_size * 0.7:
# Make the new bucket size an odd number to reduce hash conflicts.
new_bucket_size = 2 * self.bucket_size + 1
# Shrink the buckets when the hash table is 30% or less used.
elif self.item_count <= self.bucket_size * 0.3:
# Make the new bucket size an odd number to reduce hash conflicts.
new_bucket_size = int(self.bucket_size / 2) + 1
else:
return
# Rehash all items to the new buckets.
new_buckets = [None] * new_bucket_size
for index in range(self.bucket_size):
item = self.buckets[index]
while item:
bucket_index = calculate_hash(item.key) % new_bucket_size
new_item = Item(item.key, item.value, new_buckets[bucket_index])
new_buckets[bucket_index] = new_item
item = item.next
# Update the buckets to the new buckets.
self.bucket_size = new_bucket_size
self.buckets = new_buckets
# Return the total number of items in the hash table.
def size(self):
return self.item_count
# Check that the hash table has a "reasonable" bucket size.
# The bucket size is judged "reasonable" if it is smaller than 100 or
# the buckets are 30% or more used.
#
# Note: Don't change this function.
def check_size(self):
assert (self.bucket_size < 100 or
self.item_count >= self.bucket_size * 0.3)
# Test the functional behavior of the hash table.
def functional_test():
hash_table = HashTable()
assert hash_table.put("aaa", 1) == True
assert hash_table.get("aaa") == (1, True)
assert hash_table.size() == 1
assert hash_table.put("bbb", 2) == True
assert hash_table.put("ccc", 3) == True
assert hash_table.put("ddd", 4) == True
assert hash_table.get("aaa") == (1, True)
assert hash_table.get("bbb") == (2, True)
assert hash_table.get("ccc") == (3, True)
assert hash_table.get("ddd") == (4, True)
assert hash_table.get("a") == (None, False)
assert hash_table.get("aa") == (None, False)
assert hash_table.get("aaaa") == (None, False)
assert hash_table.size() == 4
assert hash_table.put("aaa", 11) == False
assert hash_table.get("aaa") == (11, True)
assert hash_table.size() == 4
assert hash_table.delete("aaa") == True
assert hash_table.get("aaa") == (None, False)
assert hash_table.size() == 3
assert hash_table.delete("a") == False
assert hash_table.delete("aa") == False
assert hash_table.delete("aaa") == False
assert hash_table.delete("aaaa") == False
assert hash_table.delete("ddd") == True
assert hash_table.delete("ccc") == True
assert hash_table.delete("bbb") == True
assert hash_table.get("aaa") == (None, False)
assert hash_table.get("bbb") == (None, False)
assert hash_table.get("ccc") == (None, False)
assert hash_table.get("ddd") == (None, False)
assert hash_table.size() == 0
assert hash_table.put("abc", 1) == True
assert hash_table.put("acb", 2) == True
assert hash_table.put("bac", 3) == True
assert hash_table.put("bca", 4) == True
assert hash_table.put("cab", 5) == True
assert hash_table.put("cba", 6) == True
assert hash_table.get("abc") == (1, True)
assert hash_table.get("acb") == (2, True)
assert hash_table.get("bac") == (3, True)
assert hash_table.get("bca") == (4, True)
assert hash_table.get("cab") == (5, True)
assert hash_table.get("cba") == (6, True)
assert hash_table.size() == 6
assert hash_table.delete("abc") == True
assert hash_table.delete("cba") == True
assert hash_table.delete("bac") == True
assert hash_table.delete("bca") == True
assert hash_table.delete("acb") == True
assert hash_table.delete("cab") == True
assert hash_table.size() == 0
print("Functional tests passed!")
# Test the performance of the hash table.
def performance_test():
hash_table = HashTable()
for iteration in range(100):
begin = time.time()
random.seed(iteration)
for i in range(10000):
rand = random.randint(0, 100000000)
hash_table.put(str(rand), str(rand))
random.seed(iteration)
for i in range(10000):
rand = random.randint(0, 100000000)
hash_table.get(str(rand))
end = time.time()
print("%d %.6f" % (iteration, end - begin))
for iteration in range(100):
random.seed(iteration)
for i in range(10000):
rand = random.randint(0, 100000000)
hash_table.delete(str(rand))
assert hash_table.size() == 0
print("Performance tests passed!")
if __name__ == "__main__":
functional_test()
performance_test()