-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfh_am_test.py
97 lines (72 loc) · 3.15 KB
/
fh_am_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""
Testing fly_hash encoder and associative memory to test MNIST retrieval accuracy
We get >%91 accuracy using first 20k train digits
This is just a poor replacement for knn nearest neighbor.
Accuracy losses may be caused by both FHEncoder and associative memory overlaps.
"""
from sdr_mem2d import SDRMap
from fly_hash_encoder import FHEncoder
from load_mnist_data import x_train, x_test, y_train, y_test, normalize
x_train = normalize(x_train) # Our normalize aranges all images flattened and all image sum()-s are 1.0
x_test = normalize(x_test)
import numpy as np
from time import time
# Which x_train digits will be stored in memory
istart,ilen = 0,60000
#istart,ilen = 0,20000
iend = istart + ilen
SDR_size = 2048
SLOT_size = 112
# Use this many bits to store train data in memory
store_sdr_len = 32 # The length of stored SDRs
# Use these many bits to query sdr memory
query_sdr_len = 32 # The length of query SDRs
# how many output pixels are touched by every input pixel.
pixel_spread = 200
# Initialise the memory
print(f"Initialize SDRMap sdr_size={SDR_size}, slot_size={SLOT_size}")
print(f"Will use sdr lengths for storage: {store_sdr_len}; for query: {query_sdr_len}")
smap = SDRMap(sdr_size = SDR_size,slot_size = SLOT_size)
# Create a FlyHash encoder. The hasher converts a alist of MNIST image to a SDR list of size 2048
print(f"Generate Flyhash encoder, spread={pixel_spread}...")
hasher = FHEncoder(sdr_size = SDR_size, random_seed = 3, spread = pixel_spread)
print(f"hasher initialised, we use it to convert {ilen} x_train images to SDRs")
t = time()
sdrs = hasher.compute_sdrs(x_train[istart:iend],sdr_len = store_sdr_len)
t = time() - t
print(f"{ilen} sdrs computed in {int(t*1000)}ms")
print(f"Training sdrs.shape:{sdrs.shape}, dtype:{sdrs.dtype}")
# IDs will encode both y_train values and train index (position in x_train)
sdr_ids = y_train[istart:iend] + np.arange(istart,iend) * 100 + 10000000
# If above looks weird, here-s an example of what it does: if y_train[15632] == 7 then the ID becomes:
# 11563207
# _NNNNN_D - where '_' are ignored 'D' is the digit value and NNNNN is the row num in the x_train array
print(f"Begin storing {ilen} SDRs in associative memory (a.k.a sdr map)")
t = time()
smap.store(sdr_ids,sdrs)
t = time()-t
print(f"sdrs stored in {int(1000*t)}ms")
# "training" done
# Testing
xtest = x_test
ytest = y_test
print("Begin querrying memory map with x_test")
t=time()
sdrs = hasher.compute_sdrs(xtest, sdr_len=query_sdr_len)
idlists, idcounts = smap.query(sdrs,first=8)
t = time()-t
print(f"Associative query {len(idlists)} done in {int(t*1000)}ms")
print(f"\nNext we retrieve predicted digit numbers from the responses")
t = time()
idresults = []
for idlist, idcount in zip(idlists, idcounts):
results = np.zeros(10)
for i in range(len(idlist)):
pred = idlist[i] % 100 # restore digit value from id.
results[pred] += idcount[i]
idresults.append(np.argmax(results))
idresults = np.array(idresults)
# print(f"idresults.sum() {idresults.sum()}, dtype={idresults.dtype}")
comps = idresults == ytest
t=time()-t
print(f" result len = {len(comps)}, positive = {comps.sum()/100}%, computed in {int(t*1000)}ms")