forked from omar-florez/scratch_mlp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scratch_mlp.py
125 lines (106 loc) · 5.1 KB
/
scratch_mlp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python
# @Author: Omar U. Florez
# @Date: October 28, 2017
'''
Train a feed forward neural network using only numpy Math library. This contains step by step explanation of the
learning process of the network.
'''
import numpy as np
import ipdb
from scratch_mlp import utils
utils.reset_folders()
def load_XOR_data(N=300):
rng = np.random.RandomState(0)
X = rng.randn(N, 2)
y = np.array(np.logical_xor(X[:, 0] > 0, X[:, 1] > 0), dtype=int)
y = np.expand_dims(y, 1)
y_hot_encoded = []
for x in y:
if x == 0:
y_hot_encoded.append([1,0])
else:
y_hot_encoded.append([0, 1])
return X, np.array(y_hot_encoded)
def sigmoid(z, first_derivative=False):
if first_derivative:
return z*(1.0-z)
return 1.0/(1.0+np.exp(-z))
def tanh(z, first_derivative=True):
if first_derivative:
return (1.0-z*z)
return (1.0-np.exp(-z))/(1.0+np.exp(-z))
def inference(data, weights):
h1 = sigmoid(np.matmul(data, weights[0]))
logits = np.matmul(h1, weights[1])
probs = np.exp(logits)/np.sum(np.exp(logits), axis=1, keepdims=True)
return np.argmax(probs, axis=1)
def run():
#size of minibatch: int(X.shape[0])
N = 50
X, y = load_XOR_data(N=300)
input_dim = int(X.shape[1])
hidden_dim = 10
output_dim = 2
num_epochs = 1000000
learning_rate= 1e-3
reg_coeff = 1e-6
losses = []
accuracies=[]
#---------------------------------------------------------------------------------------------------------------
# Initialize weights:
np.random.seed(2017)
w1 = 2.0*np.random.random((input_dim, hidden_dim))-1.0 #w0=(2,hidden_dim)
w2 = 2.0*np.random.random((hidden_dim, output_dim))-1.0 #w1=(hidden_dim,2)
#Calibratring variances with 1/sqrt(fan_in)
w1 /= np.sqrt(input_dim)
w2 /= np.sqrt(hidden_dim)
for i in range(num_epochs):
index = np.arange(X.shape[0])[:N]
#is want to shuffle indices: np.random.shuffle(index)
#---------------------------------------------------------------------------------------------------------------
# Forward step:
h1 = sigmoid(np.matmul(X[index], w1)) #(N, 3)
logits = sigmoid(np.matmul(h1, w2)) #(N, 2)
probs = np.exp(logits)/np.sum(np.exp(logits), axis=1, keepdims=True)
h2 = logits
#---------------------------------------------------------------------------------------------------------------
# Definition of Loss function: mean squared error plus Ridge regularization
L = np.square(y[index]-h2).sum()/(2*N) + reg_coeff*(np.square(w1).sum()+np.square(w2).sum())/(2*N)
losses.append([i,L])
#---------------------------------------------------------------------------------------------------------------
# Backward step: Error = W_l e_l+1 f'_l
# dL/dw2 = dL/dh2 * dh2/dz2 * dz2/dw2
dL_dh2 = -(y[index] - h2) #(N, 2)
dh2_dz2 = sigmoid(h2, first_derivative=True) #(N, 2)
dz2_dw2 = h1 #(N, hidden_dim)
#Gradient for weight2: (hidden_dim,N)x(N,2)*(N,2)
dL_dw2 = dz2_dw2.T.dot(dL_dh2*dh2_dz2) + reg_coeff*np.square(w2).sum()
#dL/dw1 = dL/dh1 * dh1/dz1 * dz1/dw1
# dL/dh1 = dL/dz2 * dz2/dh1
# dL/dz2 = dL/dh2 * dh2/dz2
dL_dz2 = dL_dh2 * dh2_dz2 #(N, 2)
dz2_dh1 = w2 #z2 = h1*w2
dL_dh1 = dL_dz2.dot(dz2_dh1.T) #(N,2)x(2, hidden_dim)=(N, hidden_dim)
dh1_dz1 = sigmoid(h1, first_derivative=True) #(N,hidden_dim)
dz1_dw1 = X[index] #(N,2)
#Gradient for weight1: (2,N)x((N,hidden_dim)*(N,hidden_dim))
dL_dw1 = dz1_dw1.T.dot(dL_dh1*dh1_dz1) + reg_coeff*np.square(w1).sum()
#weight updates:
w2 += -learning_rate*dL_dw2
w1 += -learning_rate*dL_dw1
if True: #(i+1)%1000==0:
y_pred = inference(X, [w1, w2])
y_actual = np.argmax(y, axis=1)
accuracy = np.sum(np.equal(y_pred,y_actual))/len(y_actual)
accuracies.append([i, accuracy])
if (i+1)% 10000 == 0:
print('Epoch %d\tLoss: %f Average L1 error: %f Accuracy: %f' %(i, L, np.mean(np.abs(dL_dh2)), accuracy))
save_filepath = './scratch_mlp/plots/boundary/image_%d.png'%i
text = 'Batch #: %d Accuracy: %.2f Loss value: %.2f'%(i, accuracy, L)
utils.plot_decision_boundary(X, y_actual, lambda x: inference(x, [w1, w2]),
save_filepath=save_filepath, text = text)
save_filepath = './scratch_mlp/plots/loss/image_%d.png' % i
utils.plot_function(losses, save_filepath=save_filepath, ylabel='Loss', title='Loss estimation')
save_filepath = './scratch_mlp/plots/accuracy/image_%d.png' % i
utils.plot_function(accuracies, save_filepath=save_filepath, ylabel='Accuracy', title='Accuracy estimation')
run()